blob: 67c21a2545f034469b4866ecd7cff0a07d214660 [file] [log] [blame]
Remy Bohmer16c13282020-09-10 10:38:04 +02001# Copyright (C) 2008 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020015import errno
Remy Bohmer16c13282020-09-10 10:38:04 +020016import json
17import os
18import re
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020019import subprocess
Remy Bohmer16c13282020-09-10 10:38:04 +020020import sys
21import traceback
Mike Frysingeracf63b22019-06-13 02:24:21 -040022import urllib.parse
Remy Bohmer16c13282020-09-10 10:38:04 +020023
24from error import HookError
25from git_refs import HEAD
26
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020027
Remy Bohmer16c13282020-09-10 10:38:04 +020028class RepoHook(object):
29 """A RepoHook contains information about a script to run as a hook.
30
31 Hooks are used to run a python script before running an upload (for instance,
32 to run presubmit checks). Eventually, we may have hooks for other actions.
33
34 This shouldn't be confused with files in the 'repo/hooks' directory. Those
35 files are copied into each '.git/hooks' folder for each project. Repo-level
36 hooks are associated instead with repo actions.
37
38 Hooks are always python. When a hook is run, we will load the hook into the
39 interpreter and execute its main() function.
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020040
41 Combinations of hook option flags:
42 - no-verify=False, verify=False (DEFAULT):
43 If stdout is a tty, can prompt about running hooks if needed.
44 If user denies running hooks, the action is cancelled. If stdout is
45 not a tty and we would need to prompt about hooks, action is
46 cancelled.
47 - no-verify=False, verify=True:
48 Always run hooks with no prompt.
49 - no-verify=True, verify=False:
50 Never run hooks, but run action anyway (AKA bypass hooks).
51 - no-verify=True, verify=True:
52 Invalid
Remy Bohmer16c13282020-09-10 10:38:04 +020053 """
54
55 def __init__(self,
56 hook_type,
57 hooks_project,
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020058 repo_topdir,
Remy Bohmer16c13282020-09-10 10:38:04 +020059 manifest_url,
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020060 bypass_hooks=False,
61 allow_all_hooks=False,
62 ignore_hooks=False,
Remy Bohmer16c13282020-09-10 10:38:04 +020063 abort_if_user_denies=False):
64 """RepoHook constructor.
65
66 Params:
67 hook_type: A string representing the type of hook. This is also used
68 to figure out the name of the file containing the hook. For
69 example: 'pre-upload'.
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020070 hooks_project: The project containing the repo hooks.
71 If you have a manifest, this is manifest.repo_hooks_project.
72 OK if this is None, which will make the hook a no-op.
73 repo_topdir: The top directory of the repo client checkout.
74 This is the one containing the .repo directory. Scripts will
75 run with CWD as this directory.
76 If you have a manifest, this is manifest.topdir.
Remy Bohmer16c13282020-09-10 10:38:04 +020077 manifest_url: The URL to the manifest git repo.
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020078 bypass_hooks: If True, then 'Do not run the hook'.
79 allow_all_hooks: If True, then 'Run the hook without prompting'.
80 ignore_hooks: If True, then 'Do not abort action if hooks fail'.
81 abort_if_user_denies: If True, we'll abort running the hook if the user
Remy Bohmer16c13282020-09-10 10:38:04 +020082 doesn't allow us to run the hook.
83 """
84 self._hook_type = hook_type
85 self._hooks_project = hooks_project
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020086 self._repo_topdir = repo_topdir
Remy Bohmer16c13282020-09-10 10:38:04 +020087 self._manifest_url = manifest_url
Remy Bohmer7f7acfe2020-08-01 18:36:44 +020088 self._bypass_hooks = bypass_hooks
89 self._allow_all_hooks = allow_all_hooks
90 self._ignore_hooks = ignore_hooks
Remy Bohmer16c13282020-09-10 10:38:04 +020091 self._abort_if_user_denies = abort_if_user_denies
92
93 # Store the full path to the script for convenience.
94 if self._hooks_project:
95 self._script_fullpath = os.path.join(self._hooks_project.worktree,
96 self._hook_type + '.py')
97 else:
98 self._script_fullpath = None
99
100 def _GetHash(self):
101 """Return a hash of the contents of the hooks directory.
102
103 We'll just use git to do this. This hash has the property that if anything
104 changes in the directory we will return a different has.
105
106 SECURITY CONSIDERATION:
107 This hash only represents the contents of files in the hook directory, not
108 any other files imported or called by hooks. Changes to imported files
109 can change the script behavior without affecting the hash.
110
111 Returns:
112 A string representing the hash. This will always be ASCII so that it can
113 be printed to the user easily.
114 """
115 assert self._hooks_project, "Must have hooks to calculate their hash."
116
117 # We will use the work_git object rather than just calling GetRevisionId().
118 # That gives us a hash of the latest checked in version of the files that
119 # the user will actually be executing. Specifically, GetRevisionId()
120 # doesn't appear to change even if a user checks out a different version
121 # of the hooks repo (via git checkout) nor if a user commits their own revs.
122 #
123 # NOTE: Local (non-committed) changes will not be factored into this hash.
124 # I think this is OK, since we're really only worried about warning the user
125 # about upstream changes.
Remy Bohmer7f7acfe2020-08-01 18:36:44 +0200126 return self._hooks_project.work_git.rev_parse(HEAD)
Remy Bohmer16c13282020-09-10 10:38:04 +0200127
128 def _GetMustVerb(self):
129 """Return 'must' if the hook is required; 'should' if not."""
130 if self._abort_if_user_denies:
131 return 'must'
132 else:
133 return 'should'
134
135 def _CheckForHookApproval(self):
136 """Check to see whether this hook has been approved.
137
138 We'll accept approval of manifest URLs if they're using secure transports.
139 This way the user can say they trust the manifest hoster. For insecure
140 hosts, we fall back to checking the hash of the hooks repo.
141
142 Note that we ask permission for each individual hook even though we use
143 the hash of all hooks when detecting changes. We'd like the user to be
144 able to approve / deny each hook individually. We only use the hash of all
145 hooks because there is no other easy way to detect changes to local imports.
146
147 Returns:
148 True if this hook is approved to run; False otherwise.
149
150 Raises:
151 HookError: Raised if the user doesn't approve and abort_if_user_denies
152 was passed to the consturctor.
153 """
154 if self._ManifestUrlHasSecureScheme():
155 return self._CheckForHookApprovalManifest()
156 else:
157 return self._CheckForHookApprovalHash()
158
159 def _CheckForHookApprovalHelper(self, subkey, new_val, main_prompt,
160 changed_prompt):
161 """Check for approval for a particular attribute and hook.
162
163 Args:
164 subkey: The git config key under [repo.hooks.<hook_type>] to store the
165 last approved string.
166 new_val: The new value to compare against the last approved one.
167 main_prompt: Message to display to the user to ask for approval.
168 changed_prompt: Message explaining why we're re-asking for approval.
169
170 Returns:
171 True if this hook is approved to run; False otherwise.
172
173 Raises:
174 HookError: Raised if the user doesn't approve and abort_if_user_denies
175 was passed to the consturctor.
176 """
177 hooks_config = self._hooks_project.config
178 git_approval_key = 'repo.hooks.%s.%s' % (self._hook_type, subkey)
179
180 # Get the last value that the user approved for this hook; may be None.
181 old_val = hooks_config.GetString(git_approval_key)
182
183 if old_val is not None:
184 # User previously approved hook and asked not to be prompted again.
185 if new_val == old_val:
186 # Approval matched. We're done.
187 return True
188 else:
189 # Give the user a reason why we're prompting, since they last told
190 # us to "never ask again".
191 prompt = 'WARNING: %s\n\n' % (changed_prompt,)
192 else:
193 prompt = ''
194
195 # Prompt the user if we're not on a tty; on a tty we'll assume "no".
196 if sys.stdout.isatty():
197 prompt += main_prompt + ' (yes/always/NO)? '
198 response = input(prompt).lower()
199 print()
200
201 # User is doing a one-time approval.
202 if response in ('y', 'yes'):
203 return True
204 elif response == 'always':
205 hooks_config.SetString(git_approval_key, new_val)
206 return True
207
208 # For anything else, we'll assume no approval.
209 if self._abort_if_user_denies:
210 raise HookError('You must allow the %s hook or use --no-verify.' %
211 self._hook_type)
212
213 return False
214
215 def _ManifestUrlHasSecureScheme(self):
216 """Check if the URI for the manifest is a secure transport."""
217 secure_schemes = ('file', 'https', 'ssh', 'persistent-https', 'sso', 'rpc')
218 parse_results = urllib.parse.urlparse(self._manifest_url)
219 return parse_results.scheme in secure_schemes
220
221 def _CheckForHookApprovalManifest(self):
222 """Check whether the user has approved this manifest host.
223
224 Returns:
225 True if this hook is approved to run; False otherwise.
226 """
227 return self._CheckForHookApprovalHelper(
228 'approvedmanifest',
229 self._manifest_url,
230 'Run hook scripts from %s' % (self._manifest_url,),
231 'Manifest URL has changed since %s was allowed.' % (self._hook_type,))
232
233 def _CheckForHookApprovalHash(self):
234 """Check whether the user has approved the hooks repo.
235
236 Returns:
237 True if this hook is approved to run; False otherwise.
238 """
239 prompt = ('Repo %s run the script:\n'
240 ' %s\n'
241 '\n'
242 'Do you want to allow this script to run')
243 return self._CheckForHookApprovalHelper(
244 'approvedhash',
245 self._GetHash(),
246 prompt % (self._GetMustVerb(), self._script_fullpath),
247 'Scripts have changed since %s was allowed.' % (self._hook_type,))
248
249 @staticmethod
250 def _ExtractInterpFromShebang(data):
251 """Extract the interpreter used in the shebang.
252
253 Try to locate the interpreter the script is using (ignoring `env`).
254
255 Args:
256 data: The file content of the script.
257
258 Returns:
259 The basename of the main script interpreter, or None if a shebang is not
260 used or could not be parsed out.
261 """
262 firstline = data.splitlines()[:1]
263 if not firstline:
264 return None
265
266 # The format here can be tricky.
267 shebang = firstline[0].strip()
268 m = re.match(r'^#!\s*([^\s]+)(?:\s+([^\s]+))?', shebang)
269 if not m:
270 return None
271
272 # If the using `env`, find the target program.
273 interp = m.group(1)
274 if os.path.basename(interp) == 'env':
275 interp = m.group(2)
276
277 return interp
278
279 def _ExecuteHookViaReexec(self, interp, context, **kwargs):
280 """Execute the hook script through |interp|.
281
282 Note: Support for this feature should be dropped ~Jun 2021.
283
284 Args:
285 interp: The Python program to run.
286 context: Basic Python context to execute the hook inside.
287 kwargs: Arbitrary arguments to pass to the hook script.
288
289 Raises:
290 HookError: When the hooks failed for any reason.
291 """
292 # This logic needs to be kept in sync with _ExecuteHookViaImport below.
293 script = """
294import json, os, sys
295path = '''%(path)s'''
296kwargs = json.loads('''%(kwargs)s''')
297context = json.loads('''%(context)s''')
298sys.path.insert(0, os.path.dirname(path))
299data = open(path).read()
300exec(compile(data, path, 'exec'), context)
301context['main'](**kwargs)
302""" % {
303 'path': self._script_fullpath,
304 'kwargs': json.dumps(kwargs),
305 'context': json.dumps(context),
306 }
307
308 # We pass the script via stdin to avoid OS argv limits. It also makes
309 # unhandled exception tracebacks less verbose/confusing for users.
310 cmd = [interp, '-c', 'import sys; exec(sys.stdin.read())']
311 proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)
312 proc.communicate(input=script.encode('utf-8'))
313 if proc.returncode:
314 raise HookError('Failed to run %s hook.' % (self._hook_type,))
315
316 def _ExecuteHookViaImport(self, data, context, **kwargs):
317 """Execute the hook code in |data| directly.
318
319 Args:
320 data: The code of the hook to execute.
321 context: Basic Python context to execute the hook inside.
322 kwargs: Arbitrary arguments to pass to the hook script.
323
324 Raises:
325 HookError: When the hooks failed for any reason.
326 """
327 # Exec, storing global context in the context dict. We catch exceptions
328 # and convert to a HookError w/ just the failing traceback.
329 try:
330 exec(compile(data, self._script_fullpath, 'exec'), context)
331 except Exception:
332 raise HookError('%s\nFailed to import %s hook; see traceback above.' %
333 (traceback.format_exc(), self._hook_type))
334
335 # Running the script should have defined a main() function.
336 if 'main' not in context:
337 raise HookError('Missing main() in: "%s"' % self._script_fullpath)
338
339 # Call the main function in the hook. If the hook should cause the
340 # build to fail, it will raise an Exception. We'll catch that convert
341 # to a HookError w/ just the failing traceback.
342 try:
343 context['main'](**kwargs)
344 except Exception:
345 raise HookError('%s\nFailed to run main() for %s hook; see traceback '
346 'above.' % (traceback.format_exc(), self._hook_type))
347
348 def _ExecuteHook(self, **kwargs):
349 """Actually execute the given hook.
350
351 This will run the hook's 'main' function in our python interpreter.
352
353 Args:
354 kwargs: Keyword arguments to pass to the hook. These are often specific
355 to the hook type. For instance, pre-upload hooks will contain
356 a project_list.
357 """
358 # Keep sys.path and CWD stashed away so that we can always restore them
359 # upon function exit.
360 orig_path = os.getcwd()
361 orig_syspath = sys.path
362
363 try:
364 # Always run hooks with CWD as topdir.
Remy Bohmer7f7acfe2020-08-01 18:36:44 +0200365 os.chdir(self._repo_topdir)
Remy Bohmer16c13282020-09-10 10:38:04 +0200366
367 # Put the hook dir as the first item of sys.path so hooks can do
368 # relative imports. We want to replace the repo dir as [0] so
369 # hooks can't import repo files.
370 sys.path = [os.path.dirname(self._script_fullpath)] + sys.path[1:]
371
372 # Initial global context for the hook to run within.
373 context = {'__file__': self._script_fullpath}
374
375 # Add 'hook_should_take_kwargs' to the arguments to be passed to main.
376 # We don't actually want hooks to define their main with this argument--
377 # it's there to remind them that their hook should always take **kwargs.
378 # For instance, a pre-upload hook should be defined like:
379 # def main(project_list, **kwargs):
380 #
381 # This allows us to later expand the API without breaking old hooks.
382 kwargs = kwargs.copy()
383 kwargs['hook_should_take_kwargs'] = True
384
385 # See what version of python the hook has been written against.
386 data = open(self._script_fullpath).read()
387 interp = self._ExtractInterpFromShebang(data)
388 reexec = False
389 if interp:
390 prog = os.path.basename(interp)
391 if prog.startswith('python2') and sys.version_info.major != 2:
392 reexec = True
393 elif prog.startswith('python3') and sys.version_info.major == 2:
394 reexec = True
395
396 # Attempt to execute the hooks through the requested version of Python.
397 if reexec:
398 try:
399 self._ExecuteHookViaReexec(interp, context, **kwargs)
400 except OSError as e:
401 if e.errno == errno.ENOENT:
402 # We couldn't find the interpreter, so fallback to importing.
403 reexec = False
404 else:
405 raise
406
407 # Run the hook by importing directly.
408 if not reexec:
409 self._ExecuteHookViaImport(data, context, **kwargs)
410 finally:
411 # Restore sys.path and CWD.
412 sys.path = orig_syspath
413 os.chdir(orig_path)
414
Remy Bohmer7f7acfe2020-08-01 18:36:44 +0200415 def _CheckHook(self):
416 # Bail with a nice error if we can't find the hook.
417 if not os.path.isfile(self._script_fullpath):
418 raise HookError('Couldn\'t find repo hook: %s' % self._script_fullpath)
419
420 def Run(self, **kwargs):
Remy Bohmer16c13282020-09-10 10:38:04 +0200421 """Run the hook.
422
423 If the hook doesn't exist (because there is no hooks project or because
424 this particular hook is not enabled), this is a no-op.
425
426 Args:
427 user_allows_all_hooks: If True, we will never prompt about running the
428 hook--we'll just assume it's OK to run it.
429 kwargs: Keyword arguments to pass to the hook. These are often specific
430 to the hook type. For instance, pre-upload hooks will contain
431 a project_list.
432
Remy Bohmer7f7acfe2020-08-01 18:36:44 +0200433 Returns:
434 True: On success or ignore hooks by user-request
435 False: The hook failed. The caller should respond with aborting the action.
436 Some examples in which False is returned:
437 * Finding the hook failed while it was enabled, or
438 * the user declined to run a required hook (from _CheckForHookApproval)
439 In all these cases the user did not pass the proper arguments to
440 ignore the result through the option combinations as listed in
441 AddHookOptionGroup().
Remy Bohmer16c13282020-09-10 10:38:04 +0200442 """
Remy Bohmer7f7acfe2020-08-01 18:36:44 +0200443 # Do not do anything in case bypass_hooks is set, or
444 # no-op if there is no hooks project or if hook is disabled.
445 if (self._bypass_hooks or
446 not self._hooks_project or
447 self._hook_type not in self._hooks_project.enabled_repo_hooks):
448 return True
Remy Bohmer16c13282020-09-10 10:38:04 +0200449
Remy Bohmer7f7acfe2020-08-01 18:36:44 +0200450 passed = True
451 try:
452 self._CheckHook()
Remy Bohmer16c13282020-09-10 10:38:04 +0200453
Remy Bohmer7f7acfe2020-08-01 18:36:44 +0200454 # Make sure the user is OK with running the hook.
455 if self._allow_all_hooks or self._CheckForHookApproval():
456 # Run the hook with the same version of python we're using.
457 self._ExecuteHook(**kwargs)
458 except SystemExit as e:
459 passed = False
460 print('ERROR: %s hooks exited with exit code: %s' % (self._hook_type, str(e)),
461 file=sys.stderr)
462 except HookError as e:
463 passed = False
464 print('ERROR: %s' % str(e), file=sys.stderr)
Remy Bohmer16c13282020-09-10 10:38:04 +0200465
Remy Bohmer7f7acfe2020-08-01 18:36:44 +0200466 if not passed and self._ignore_hooks:
467 print('\nWARNING: %s hooks failed, but continuing anyways.' % self._hook_type,
468 file=sys.stderr)
469 passed = True
470
471 return passed
472
473 @classmethod
474 def FromSubcmd(cls, manifest, opt, *args, **kwargs):
475 """Method to construct the repo hook class
476
477 Args:
478 manifest: The current active manifest for this command from which we
479 extract a couple of fields.
480 opt: Contains the commandline options for the action of this hook.
481 It should contain the options added by AddHookOptionGroup() in which
482 we are interested in RepoHook execution.
483 """
484 for key in ('bypass_hooks', 'allow_all_hooks', 'ignore_hooks'):
485 kwargs.setdefault(key, getattr(opt, key))
486 kwargs.update({
487 'hooks_project': manifest.repo_hooks_project,
488 'repo_topdir': manifest.topdir,
489 'manifest_url': manifest.manifestProject.GetRemote('origin').url,
490 })
491 return cls(*args, **kwargs)
492
493 @staticmethod
494 def AddOptionGroup(parser, name):
495 """Help options relating to the various hooks."""
496
497 # Note that verify and no-verify are NOT opposites of each other, which
498 # is why they store to different locations. We are using them to match
499 # 'git commit' syntax.
500 group = parser.add_option_group(name + ' hooks')
501 group.add_option('--no-verify',
502 dest='bypass_hooks', action='store_true',
503 help='Do not run the %s hook.' % name)
504 group.add_option('--verify',
505 dest='allow_all_hooks', action='store_true',
506 help='Run the %s hook without prompting.' % name)
507 group.add_option('--ignore-hooks',
508 action='store_true',
509 help='Do not abort if %s hooks fail.' % name)