Blame - build/get_syzygy_binaries.py - fp2-dev/platform/external/v8

blob: 1cab3fcf48dfd21f8c54e6492101f9d5df2e6300 [file] [log] [blame]

Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright 2014 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	"""A utility script for downloading versioned Syzygy binaries."""
				7
				8	import hashlib
				9	import errno
				10	import json
				11	import logging
				12	import optparse
				13	import os
				14	import re
				15	import shutil
				16	import stat
				17	import sys
				18	import subprocess
				19	import tempfile
				20	import time
				21	import zipfile
				22
				23
				24	_LOGGER = logging.getLogger(os.path.basename(__file__))
				25
				26	# The relative path where official builds are archived in their GS bucket.
				27	_SYZYGY_ARCHIVE_PATH = ('/builds/official/%(revision)s')
				28
				29	# A JSON file containing the state of the download directory. If this file and
				30	# directory state do not agree, then the binaries will be downloaded and
				31	# installed again.
				32	_STATE = '.state'
				33
				34	# This matches an integer (an SVN revision number) or a SHA1 value (a GIT hash).
				35	# The archive exclusively uses lowercase GIT hashes.
				36	_REVISION_RE = re.compile('^(?:\d+\|[a-f0-9]{40})$')
				37
				38	# This matches an MD5 hash.
				39	_MD5_RE = re.compile('^[a-f0-9]{32}$')
				40
				41	# List of reources to be downloaded and installed. These are tuples with the
				42	# following format:
				43	# (basename, logging name, relative installation path, extraction filter)
				44	_RESOURCES = [
				45	('benchmark.zip', 'benchmark', '', None),
				46	('binaries.zip', 'binaries', 'exe', None),
				47	('symbols.zip', 'symbols', 'exe',
				48	lambda x: x.filename.endswith('.dll.pdb'))]
				49
				50
				51	def _LoadState(output_dir):
				52	"""Loads the contents of the state file for a given \|output_dir\|, returning
				53	None if it doesn't exist.
				54	"""
				55	path = os.path.join(output_dir, _STATE)
				56	if not os.path.exists(path):
				57	_LOGGER.debug('No state file found.')
				58	return None
				59	with open(path, 'rb') as f:
				60	_LOGGER.debug('Reading state file: %s', path)
				61	try:
				62	return json.load(f)
				63	except ValueError:
				64	_LOGGER.debug('Invalid state file.')
				65	return None
				66
				67
				68	def _SaveState(output_dir, state, dry_run=False):
				69	"""Saves the \|state\| dictionary to the given \|output_dir\| as a JSON file."""
				70	path = os.path.join(output_dir, _STATE)
				71	_LOGGER.debug('Writing state file: %s', path)
				72	if dry_run:
				73	return
				74	with open(path, 'wb') as f:
				75	f.write(json.dumps(state, sort_keys=True, indent=2))
				76
				77
				78	def _Md5(path):
				79	"""Returns the MD5 hash of the file at \|path\|, which must exist."""
				80	return hashlib.md5(open(path, 'rb').read()).hexdigest()
				81
				82
				83	def _StateIsValid(state):
				84	"""Returns true if the given state structure is valid."""
				85	if not isinstance(state, dict):
				86	_LOGGER.debug('State must be a dict.')
				87	return False
				88	r = state.get('revision', None)
				89	if not isinstance(r, basestring) or not _REVISION_RE.match(r):
				90	_LOGGER.debug('State contains an invalid revision.')
				91	return False
				92	c = state.get('contents', None)
				93	if not isinstance(c, dict):
				94	_LOGGER.debug('State must contain a contents dict.')
				95	return False
				96	for (relpath, md5) in c.iteritems():
				97	if not isinstance(relpath, basestring) or len(relpath) == 0:
				98	_LOGGER.debug('State contents dict contains an invalid path.')
				99	return False
				100	if not isinstance(md5, basestring) or not _MD5_RE.match(md5):
				101	_LOGGER.debug('State contents dict contains an invalid MD5 digest.')
				102	return False
				103	return True
				104
				105
				106	def _BuildActualState(stored, revision, output_dir):
				107	"""Builds the actual state using the provided \|stored\| state as a template.
				108	Only examines files listed in the stored state, causing the script to ignore
				109	files that have been added to the directories locally. \|stored\| must be a
				110	valid state dictionary.
				111	"""
				112	contents = {}
				113	state = { 'revision': revision, 'contents': contents }
				114	for relpath, md5 in stored['contents'].iteritems():
				115	abspath = os.path.abspath(os.path.join(output_dir, relpath))
				116	if os.path.isfile(abspath):
				117	m = _Md5(abspath)
				118	contents[relpath] = m
				119
				120	return state
				121
				122
				123	def _StatesAreConsistent(stored, actual):
				124	"""Validates whether two state dictionaries are consistent. Both must be valid
				125	state dictionaries. Additional entries in \|actual\| are ignored.
				126	"""
				127	if stored['revision'] != actual['revision']:
				128	_LOGGER.debug('Mismatched revision number.')
				129	return False
				130	cont_stored = stored['contents']
				131	cont_actual = actual['contents']
				132	for relpath, md5 in cont_stored.iteritems():
				133	if relpath not in cont_actual:
				134	_LOGGER.debug('Missing content: %s', relpath)
				135	return False
				136	if md5 != cont_actual[relpath]:
				137	_LOGGER.debug('Modified content: %s', relpath)
				138	return False
				139	return True
				140
				141
				142	def _GetCurrentState(revision, output_dir):
				143	"""Loads the current state and checks to see if it is consistent. Returns
				144	a tuple (state, bool). The returned state will always be valid, even if an
				145	invalid state is present on disk.
				146	"""
				147	stored = _LoadState(output_dir)
				148	if not _StateIsValid(stored):
				149	_LOGGER.debug('State is invalid.')
				150	# Return a valid but empty state.
				151	return ({'revision': '0', 'contents': {}}, False)
				152	actual = _BuildActualState(stored, revision, output_dir)
				153	# If the script has been modified consider the state invalid.
				154	path = os.path.join(output_dir, _STATE)
				155	if os.path.getmtime(__file__) > os.path.getmtime(path):
				156	return (stored, False)
				157	# Otherwise, explicitly validate the state.
				158	if not _StatesAreConsistent(stored, actual):
				159	return (stored, False)
				160	return (stored, True)
				161
				162
				163	def _DirIsEmpty(path):
				164	"""Returns true if the given directory is empty, false otherwise."""
				165	for root, dirs, files in os.walk(path):
				166	return not dirs and not files
				167
				168
				169	def _RmTreeHandleReadOnly(func, path, exc):
				170	"""An error handling function for use with shutil.rmtree. This will
				171	detect failures to remove read-only files, and will change their properties
				172	prior to removing them. This is necessary on Windows as os.remove will return
				173	an access error for read-only files, and git repos contain read-only
				174	pack/index files.
				175	"""
				176	excvalue = exc[1]
				177	if func in (os.rmdir, os.remove) and excvalue.errno == errno.EACCES:
				178	_LOGGER.debug('Removing read-only path: %s', path)
				179	os.chmod(path, stat.S_IRWXU \| stat.S_IRWXG \| stat.S_IRWXO)
				180	func(path)
				181	else:
				182	raise
				183
				184
				185	def _RmTree(path):
				186	"""A wrapper of shutil.rmtree that handles read-only files."""
				187	shutil.rmtree(path, ignore_errors=False, onerror=_RmTreeHandleReadOnly)
				188
				189
				190	def _CleanState(output_dir, state, dry_run=False):
				191	"""Cleans up files/directories in \|output_dir\| that are referenced by
				192	the given \|state\|. Raises an error if there are local changes. Returns a
				193	dictionary of files that were deleted.
				194	"""
				195	_LOGGER.debug('Deleting files from previous installation.')
				196	deleted = {}
				197
				198	# Generate a list of files to delete, relative to \|output_dir\|.
				199	contents = state['contents']
				200	files = sorted(contents.keys())
				201
				202	# Try to delete the files. Keep track of directories to delete as well.
				203	dirs = {}
				204	for relpath in files:
				205	fullpath = os.path.join(output_dir, relpath)
				206	fulldir = os.path.dirname(fullpath)
				207	dirs[fulldir] = True
				208	if os.path.exists(fullpath):
				209	# If somehow the file has become a directory complain about it.
				210	if os.path.isdir(fullpath):
				211	raise Exception('Directory exists where file expected: %s' % fullpath)
				212
				213	# Double check that the file doesn't have local changes. If it does
				214	# then refuse to delete it.
				215	if relpath in contents:
				216	stored_md5 = contents[relpath]
				217	actual_md5 = _Md5(fullpath)
				218	if actual_md5 != stored_md5:
				219	raise Exception('File has local changes: %s' % fullpath)
				220
				221	# The file is unchanged so it can safely be deleted.
				222	_LOGGER.debug('Deleting file "%s".', fullpath)
				223	deleted[relpath] = True
				224	if not dry_run:
				225	os.unlink(fullpath)
				226
				227	# Sort directories from longest name to shortest. This lets us remove empty
				228	# directories from the most nested paths first.
				229	dirs = sorted(dirs.keys(), key=lambda x: len(x), reverse=True)
				230	for p in dirs:
				231	if os.path.exists(p) and _DirIsEmpty(p):
				232	_LOGGER.debug('Deleting empty directory "%s".', p)
				233	if not dry_run:
				234	_RmTree(p)
				235
				236	return deleted
				237
				238
				239	def _FindGsUtil():
				240	"""Looks for depot_tools and returns the absolute path to gsutil.py."""
				241	for path in os.environ['PATH'].split(os.pathsep):
				242	path = os.path.abspath(path)
				243	git_cl = os.path.join(path, 'git_cl.py')
				244	gs_util = os.path.join(path, 'gsutil.py')
				245	if os.path.exists(git_cl) and os.path.exists(gs_util):
				246	return gs_util
				247	return None
				248
				249
				250	def _GsUtil(*cmd):
				251	"""Runs the given command in gsutil with exponential backoff and retries."""
				252	gs_util = _FindGsUtil()
				253	cmd = [sys.executable, gs_util] + list(cmd)
				254
				255	retries = 3
				256	timeout = 4 # Seconds.
				257	while True:
				258	_LOGGER.debug('Running %s', cmd)
				259	prog = subprocess.Popen(cmd, shell=False)
				260	prog.communicate()
				261
				262	# Stop retrying on success.
				263	if prog.returncode == 0:
				264	return
				265
				266	# Raise a permanent failure if retries have been exhausted.
				267	if retries == 0:
				268	raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode))
				269
				270	_LOGGER.debug('Sleeping %d seconds and trying again.', timeout)
				271	time.sleep(timeout)
				272	retries -= 1
				273	timeout *= 2
				274
				275
				276	def _Download(resource):
				277	"""Downloads the given GS resource to a temporary file, returning its path."""
				278	tmp = tempfile.mkstemp(suffix='syzygy_archive')
				279	os.close(tmp[0])
				280	url = 'gs://syzygy-archive' + resource
				281	_GsUtil('cp', url, tmp[1])
				282	return tmp[1]
				283
				284
				285	def _InstallBinaries(options, deleted={}):
				286	"""Installs Syzygy binaries. This assumes that the output directory has
				287	already been cleaned, as it will refuse to overwrite existing files."""
				288	contents = {}
				289	state = { 'revision': options.revision, 'contents': contents }
				290	archive_path = _SYZYGY_ARCHIVE_PATH % { 'revision': options.revision }
				291	if options.resources:
				292	resources = [(resource, resource, '', None)
				293	for resource in options.resources]
				294	else:
				295	resources = _RESOURCES
				296	for (base, name, subdir, filt) in resources:
				297	# Create the output directory if it doesn't exist.
				298	fulldir = os.path.join(options.output_dir, subdir)
				299	if os.path.isfile(fulldir):
				300	raise Exception('File exists where a directory needs to be created: %s' %
				301	fulldir)
				302	if not os.path.exists(fulldir):
				303	_LOGGER.debug('Creating directory: %s', fulldir)
				304	if not options.dry_run:
				305	os.makedirs(fulldir)
				306
				307	# Download and read the archive.
				308	resource = archive_path + '/' + base
				309	_LOGGER.debug('Retrieving %s archive at "%s".', name, resource)
				310	path = _Download(resource)
				311
				312	_LOGGER.debug('Unzipping %s archive.', name)
				313	with open(path, 'rb') as data:
				314	archive = zipfile.ZipFile(data)
				315	for entry in archive.infolist():
				316	if not filt or filt(entry):
				317	fullpath = os.path.normpath(os.path.join(fulldir, entry.filename))
				318	relpath = os.path.relpath(fullpath, options.output_dir)
				319	if os.path.exists(fullpath):
				320	# If in a dry-run take into account the fact that the file would
				321	# have been deleted.
				322	if options.dry_run and relpath in deleted:
				323	pass
				324	else:
				325	raise Exception('Path already exists: %s' % fullpath)
				326
				327	# Extract the file and update the state dictionary.
				328	_LOGGER.debug('Extracting "%s".', fullpath)
				329	if not options.dry_run:
				330	archive.extract(entry.filename, fulldir)
				331	md5 = _Md5(fullpath)
				332	contents[relpath] = md5
				333	if sys.platform == 'cygwin':
				334	os.chmod(fullpath, os.stat(fullpath).st_mode \| stat.S_IXUSR)
				335
				336	_LOGGER.debug('Removing temporary file "%s".', path)
				337	os.remove(path)
				338
				339	return state
				340
				341
				342	def _ParseCommandLine():
				343	"""Parses the command-line and returns an options structure."""
				344	option_parser = optparse.OptionParser()
				345	option_parser.add_option('--dry-run', action='store_true', default=False,
				346	help='If true then will simply list actions that would be performed.')
				347	option_parser.add_option('--force', action='store_true', default=False,
				348	help='Force an installation even if the binaries are up to date.')
				349	option_parser.add_option('--no-cleanup', action='store_true', default=False,
				350	help='Allow installation on non-Windows platforms, and skip the forced '
				351	'cleanup step.')
				352	option_parser.add_option('--output-dir', type='string',
				353	help='The path where the binaries will be replaced. Existing binaries '
				354	'will only be overwritten if not up to date.')
				355	option_parser.add_option('--overwrite', action='store_true', default=False,
				356	help='If specified then the installation will happily delete and rewrite '
				357	'the entire output directory, blasting any local changes.')
				358	option_parser.add_option('--revision', type='string',
				359	help='The SVN revision or GIT hash associated with the required version.')
				360	option_parser.add_option('--revision-file', type='string',
				361	help='A text file containing an SVN revision or GIT hash.')
				362	option_parser.add_option('--resource', type='string', action='append',
				363	dest='resources', help='A resource to be downloaded.')
				364	option_parser.add_option('--verbose', dest='log_level', action='store_const',
				365	default=logging.INFO, const=logging.DEBUG,
				366	help='Enables verbose logging.')
				367	option_parser.add_option('--quiet', dest='log_level', action='store_const',
				368	default=logging.INFO, const=logging.ERROR,
				369	help='Disables all output except for errors.')
				370	options, args = option_parser.parse_args()
				371	if args:
				372	option_parser.error('Unexpected arguments: %s' % args)
				373	if not options.output_dir:
				374	option_parser.error('Must specify --output-dir.')
				375	if not options.revision and not options.revision_file:
				376	option_parser.error('Must specify one of --revision or --revision-file.')
				377	if options.revision and options.revision_file:
				378	option_parser.error('Must not specify both --revision and --revision-file.')
				379
				380	# Configure logging.
				381	logging.basicConfig(level=options.log_level)
				382
				383	# If a revision file has been specified then read it.
				384	if options.revision_file:
				385	options.revision = open(options.revision_file, 'rb').read().strip()
				386	_LOGGER.debug('Parsed revision "%s" from file "%s".',
				387	options.revision, options.revision_file)
				388
				389	# Ensure that the specified SVN revision or GIT hash is valid.
				390	if not _REVISION_RE.match(options.revision):
				391	option_parser.error('Must specify a valid SVN or GIT revision.')
				392
				393	# This just makes output prettier to read.
				394	options.output_dir = os.path.normpath(options.output_dir)
				395
				396	return options
				397
				398
				399	def _RemoveOrphanedFiles(options):
				400	"""This is run on non-Windows systems to remove orphaned files that may have
				401	been downloaded by a previous version of this script.
				402	"""
				403	# Reconfigure logging to output info messages. This will allow inspection of
				404	# cleanup status on non-Windows buildbots.
				405	_LOGGER.setLevel(logging.INFO)
				406
				407	output_dir = os.path.abspath(options.output_dir)
				408
				409	# We only want to clean up the folder in 'src/third_party/syzygy', and we
				410	# expect to be called with that as an output directory. This is an attempt to
				411	# not start deleting random things if the script is run from an alternate
				412	# location, or not called from the gclient hooks.
				413	expected_syzygy_dir = os.path.abspath(os.path.join(
				414	os.path.dirname(__file__), '..', 'third_party', 'syzygy'))
				415	expected_output_dir = os.path.join(expected_syzygy_dir, 'binaries')
				416	if expected_output_dir != output_dir:
				417	_LOGGER.info('Unexpected output directory, skipping cleanup.')
				418	return
				419
				420	if not os.path.isdir(expected_syzygy_dir):
				421	_LOGGER.info('Output directory does not exist, skipping cleanup.')
				422	return
				423
				424	def OnError(function, path, excinfo):
				425	"""Logs error encountered by shutil.rmtree."""
				426	_LOGGER.error('Error when running %s(%s)', function, path, exc_info=excinfo)
				427
				428	_LOGGER.info('Removing orphaned files from %s', expected_syzygy_dir)
				429	if not options.dry_run:
				430	shutil.rmtree(expected_syzygy_dir, True, OnError)
				431
				432
				433	def main():
				434	options = _ParseCommandLine()
				435
				436	if options.dry_run:
				437	_LOGGER.debug('Performing a dry-run.')
				438
				439	# We only care about Windows platforms, as the Syzygy binaries aren't used
				440	# elsewhere. However, there was a short period of time where this script
				441	# wasn't gated on OS types, and those OSes downloaded and installed binaries.
				442	# This will cleanup orphaned files on those operating systems.
				443	if sys.platform not in ('win32', 'cygwin'):
				444	if options.no_cleanup:
				445	_LOGGER.debug('Skipping usual cleanup for non-Windows platforms.')
				446	else:
				447	return _RemoveOrphanedFiles(options)
				448
				449	# Load the current installation state, and validate it against the
				450	# requested installation.
				451	state, is_consistent = _GetCurrentState(options.revision, options.output_dir)
				452
				453	# Decide whether or not an install is necessary.
				454	if options.force:
				455	_LOGGER.debug('Forcing reinstall of binaries.')
				456	elif is_consistent:
				457	# Avoid doing any work if the contents of the directory are consistent.
				458	_LOGGER.debug('State unchanged, no reinstall necessary.')
				459	return
				460
				461	# Under normal logging this is the only only message that will be reported.
				462	_LOGGER.info('Installing revision %s Syzygy binaries.',
				463	options.revision[0:12])
				464
				465	# Clean up the old state to begin with.
				466	deleted = []
				467	if options.overwrite:
				468	if os.path.exists(options.output_dir):
				469	# If overwrite was specified then take a heavy-handed approach.
				470	_LOGGER.debug('Deleting entire installation directory.')
				471	if not options.dry_run:
				472	_RmTree(options.output_dir)
				473	else:
				474	# Otherwise only delete things that the previous installation put in place,
				475	# and take care to preserve any local changes.
				476	deleted = _CleanState(options.output_dir, state, options.dry_run)
				477
				478	# Install the new binaries. In a dry-run this will actually download the
				479	# archives, but it won't write anything to disk.
				480	state = _InstallBinaries(options, deleted)
				481
				482	# Build and save the state for the directory.
				483	_SaveState(options.output_dir, state, options.dry_run)
				484
				485
				486	if __name__ == '__main__':
				487	main()