Blame - client/common_lib/packages.py - platform/external/autotest

blob: d76b3165039db11eedbf1403f3154250bb259b6f [file] [log] [blame]

showard	9d02fb5	2008-08-08 18:20:37 +0000	[diff] [blame^]	1	#!/usr/bin/python
				2
				3	"""
				4	This module defines the BasePackageManager Class which provides an
				5	implementation of the packaging system API providing methods to fetch,
				6	upload and remove packages. Site specific extensions to any of these methods
				7	should inherit this class.
				8	"""
				9
				10	import re, os, sys, traceback, subprocess, shutil, time, traceback, urlparse
				11	import fcntl
				12	from autotest_lib.client.common_lib import error, utils
				13
				14
				15	class PackageUploadError(error.AutotestError):
				16	'Raised when there is an error uploading the package'
				17
				18	class PackageFetchError(error.AutotestError):
				19	'Raised when there is an error fetching the package'
				20
				21	class PackageRemoveError(error.AutotestError):
				22	'Raised when there is an error removing the package'
				23
				24	class PackageInstallError(error.AutotestError):
				25	'Raised when there is an error installing the package'
				26
				27	# the name of the checksum file that stores the packages' checksums
				28	CHECKSUM_FILE = "packages.checksum"
				29
				30	class BasePackageManager(object):
				31	_repo_exception = {}
				32	REPO_OK = object()
				33
				34	def __init__(self, pkgmgr_dir, repo_urls=None, upload_paths=None,
				35	do_locking=True, run_function=utils.run, run_function_args=[],
				36	run_function_dargs={}):
				37	'''
				38	repo_urls: The list of the repository urls which is consulted
				39	whilst fetching the package
				40	upload_paths: The list of the upload of repositories to which
				41	the package is uploaded to
				42	pkgmgr_dir : A directory that can be used by the package manager
				43	to dump stuff (like checksum files of the repositories
				44	etc.).
				45	do_locking : Enable locking when the packages are installed.
				46
				47	run_function is used to execute the commands throughout this file.
				48	It defaults to utils.run() but a custom method (if provided) should
				49	be of the same schema as utils.run. It should return a CmdResult
				50	object and throw a CmdError exception. The reason for using a separate
				51	function to run the commands is that the same code can be run to fetch
				52	a package on the local machine or on a remote machine (in which case
				53	ssh_host's run function is passed in for run_function).
				54	'''
				55	# In memory dictionary that stores the checksum's of packages
				56	self._checksum_dict = {}
				57
				58	self.pkgmgr_dir = pkgmgr_dir
				59	self.do_locking = do_locking
				60
				61	# Process the repository URLs and the upload paths if specified
				62	if not repo_urls:
				63	self.repo_urls = []
				64	else:
				65	self.repo_urls = list(repo_urls)
				66	if not upload_paths:
				67	self.upload_paths = []
				68	else:
				69	self.upload_paths = list(upload_paths)
				70
				71	# Create an internal function that is a simple wrapper of
				72	# run_function and takes in the args and dargs as arguments
				73	def _run_command(command, _run_command_args=run_function_args,
				74	_run_command_dargs={}):
				75	'''
				76	Special internal function that takes in a command as
				77	argument and passes it on to run_function (if specified).
				78	The _run_command_dargs are merged into run_function_dargs
				79	with the former having more precedence than the latter.
				80	'''
				81	new_dargs = dict(run_function_dargs)
				82	new_dargs.update(_run_command_dargs)
				83
				84	return run_function(command, *_run_command_args,
				85	**new_dargs)
				86
				87	self._run_command = _run_command
				88
				89
				90	def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
				91	preserve_install_dir=False, repo_url=None):
				92	'''
				93	Remove install_dir if it already exists and then recreate it unless
				94	preserve_install_dir is specified as True.
				95	Fetch the package into the pkg_dir. Untar the package into install_dir
				96	The assumption is that packages are of the form :
				97	<pkg_type>.<pkg_name>.tar.bz2
				98	name : name of the package
				99	type : type of the package
				100	fetch_dir : The directory into which the package tarball will be
				101	fetched to.
				102	install_dir : the directory where the package files will be untarred to
				103	repo_url : the url of the repository to fetch the package from.
				104	'''
				105
				106	# do_locking flag is on by default unless you disable it (typically
				107	# in the cases where packages are directly installed from the server
				108	# onto the client in which case fcntl stuff wont work as the code
				109	# will run on the server in that case..
				110	if self.do_locking:
				111	lockfile_name = '.%s-%s-lock' % (name, pkg_type)
				112	lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
				113
				114	try:
				115	if self.do_locking:
				116	fcntl.flock(lockfile, fcntl.LOCK_EX)
				117
				118	self._run_command('mkdir -p %s' % fetch_dir)
				119
				120	pkg_name = self.get_tarball_name(name, pkg_type)
				121	fetch_path = os.path.join(fetch_dir, pkg_name)
				122	try:
				123	# Fetch the package into fetch_dir
				124	self.fetch_pkg(pkg_name, fetch_path)
				125
				126	# check to see if the install_dir exists and if it does
				127	# then check to see if the .checksum file is the latest
				128	install_dir_exists = False
				129	try:
				130	self._run_command("ls %s" % install_dir)
				131	install_dir_exists = True
				132	except (error.CmdError, error.AutoservRunError):
				133	pass
				134
				135	if (install_dir_exists and
				136	not self.untar_required(fetch_path, install_dir)):
				137	return
				138
				139	# untar the package into install_dir and
				140	# update the checksum in that directory
				141	if not preserve_install_dir:
				142	# Make sure we clean up the install_dir
				143	self._run_command('rm -rf %s' % install_dir)
				144	self._run_command('mkdir -p %s' % install_dir)
				145
				146	self.untar_pkg(fetch_path, install_dir)
				147
				148	except PackageFetchError, why:
				149	raise PackageInstallError('Installation of %s(type:%s) failed'
				150	' : %s' % (name, pkg_type, why))
				151	finally:
				152	if self.do_locking:
				153	fcntl.flock(lockfile, fcntl.LOCK_UN)
				154	lockfile.close()
				155
				156
				157	def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=True):
				158	'''
				159	Fetch the package into dest_dir from repo_url. By default repo_url
				160	is None and the package is looked in all the repostories specified.
				161	Otherwise it fetches it from the specific repo_url.
				162	pkg_name : name of the package (ex: test-sleeptest.tar.bz2,
				163	dep-gcc.tar.bz2, kernel.1-1.rpm)
				164	repo_url : the URL of the repository where the package is located.
				165	dest_path : complete path of where the package will be fetched to.
				166	use_checksum : This is set to False to fetch the packages.checksum file
				167	so that the checksum comparison is bypassed for the
				168	checksum file itself. This is used internally by the
				169	packaging system. It should be ignored by externals
				170	callers of this method who use it fetch custom packages.
				171	'''
				172
				173	try:
				174	self._run_command("ls %s" % os.path.dirname(dest_path))
				175	except (error.CmdError, error.AutoservRunError):
				176	raise PackageFetchError("Please provide a valid "
				177	"destination: %s " % dest_path)
				178
				179	# See if the package was already fetched earlier, if so
				180	# the checksums need to be compared and the package is now
				181	# fetched only if they differ.
				182	pkg_exists = False
				183	try:
				184	self._run_command("ls %s" % dest_path)
				185	pkg_exists = True
				186	except (error.CmdError, error.AutoservRunError):
				187	pass
				188
				189	# if a repository location is explicitly provided, fetch the package
				190	# from there and return
				191	if repo_url:
				192	repo_url_list = [repo_url]
				193	elif len(self.repo_urls) > 0:
				194	repo_url_list = self.repo_urls
				195	else:
				196	raise PackageFetchError("There are no repository urls specified")
				197
				198	error_msgs = {}
				199	for location in repo_url_list:
				200	try:
				201	# Fetch the checksum if it not there
				202	if not use_checksum:
				203	self.fetch_pkg_file(pkg_name, dest_path, location)
				204
				205	# Fetch the package if a) the pkg does not exist or
				206	# b) if the checksum differs for the existing package
				207	elif (not pkg_exists or
				208	not self.compare_checksum(dest_path, location)):
				209	self.fetch_pkg_file(pkg_name, dest_path, location)
				210	# Update the checksum of the package in the packages'
				211	# checksum file
				212	self.update_checksum(dest_path)
				213	return
				214	except (PackageFetchError, error.AutoservRunError), e:
				215	# The package could not be found in this repo, continue looking
				216	error_msgs[location] = str(e)
				217	print >> sys.stderr, ('Package - could not be fetched from '
				218	'- %s : %s' % (location, e))
				219
				220	# if we got here then that means the package is not found
				221	# in any of the repositories.
				222	raise PackageFetchError("Package could not be fetched from any of"
				223	" the repos %s : %s " % (repo_url_list,
				224	error_msgs))
				225
				226
				227	def fetch_pkg_file(self, file_name, dest_path, source_url):
				228	"""
				229	Fetch the file from source_url into dest_path. The package repository
				230	url is parsed and the appropriate retrieval method is determined.
				231
				232	"""
				233	if source_url.startswith('http://'):
				234	self.fetch_file_http(file_name, dest_path, source_url)
				235	else:
				236	raise PackageFetchError("Invalid location specified")
				237
				238
				239	def fetch_file_http(self, file_name, dest_path, source_url):
				240	"""
				241	Fetch the package using http protocol. Raises a PackageFetchError.
				242	"""
				243	# check to see if the source_url is reachable or not
				244	self.run_http_test(source_url, os.path.dirname(dest_path))
				245
				246	pkg_path = os.path.join(source_url, file_name)
				247	try:
				248	self._run_command('wget %s -O %s' % (pkg_path, dest_path))
				249	except error.CmdError, e:
				250	raise PackageFetchError("Package - %s not found in %s: %s"
				251	% (file_name, source_url, e))
				252
				253
				254	def run_http_test(self, source_url, dest_dir):
				255	'''
				256	Run a simple 30 sec wget on source_url
				257	just to see if it can be reachable or not. This avoids the need
				258	for waiting for a 10min timeout.
				259	'''
				260	dest_file_path = os.path.join(dest_dir, 'http_test_file')
				261
				262	BPM = BasePackageManager
				263	error_msg = "HTTP test failed. Failed to contact"
				264	# We should never get here unless the source_url starts with http://
				265	assert(source_url.startswith('http://'))
				266
				267	# Get the http server name from the URL
				268	server_name = urlparse.urlparse(source_url)[1]
				269	http_cmd = 'printf "GET / HTTP/1.0\n\n" \| nc %s 80' % server_name
				270
				271	if server_name in BPM._repo_exception:
				272	if BPM._repo_exception[server_name] == BPM.REPO_OK:
				273	# This repository is fine. Simply return
				274	return
				275	else:
				276	raise PackageFetchError("%s - %s : %s "
				277	% (error_msg, server_name,
				278	BPM._repo_exception[server_name]))
				279	try:
				280	try:
				281	self._run_command(http_cmd,
				282	_run_command_dargs={'timeout':30})
				283	BPM._repo_exception[server_name] = BPM.REPO_OK
				284	finally:
				285	self._run_command('rm -f %s' % dest_file_path)
				286	except error.CmdError, e:
				287	BPM._repo_exception[server_name] = e
				288	raise PackageFetchError("%s - %s: %s " % (error_msg,
				289	server_name, e))
				290
				291
				292
				293	# TODO(aganti): Fix the bug with the current checksum logic where
				294	# packages' checksums that are not present consistently in all the
				295	# repositories are not handled properly. This is a corner case though
				296	# but the ideal solution is to make the checksum file repository specific
				297	# and then maintain it.
				298	def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False):
				299	'''
				300	Uploads to a specified upload_path or to all the repos.
				301	Also uploads the checksum file to all the repos.
				302	pkg_path : The complete path to the package file
				303	upload_path : the absolute path where the files are copied to.
				304	if set to 'None' assumes 'all' repos
				305	update_checksum : If set to False, the checksum file is not
				306	going to be updated which happens by default.
				307	This is necessary for custom
				308	packages (like custom kernels and custom tests)
				309	that get uploaded which do not need to be part of
				310	the checksum file and bloat it.
				311	'''
				312	if update_checksum:
				313	# get the packages' checksum file and update it with the current
				314	# package's checksum
				315	checksum_path = self._get_checksum_file_path()
				316	self.update_checksum(pkg_path)
				317
				318	if upload_path:
				319	upload_path_list = [upload_path]
				320	elif len(self.upload_paths) > 0:
				321	upload_path_list = self.upload_paths
				322	else:
				323	raise PackageUploadError("Invalid Upload Path specified")
				324
				325	# upload the package
				326	for path in upload_path_list:
				327	self.upload_pkg_file(pkg_path, path)
				328	if update_checksum:
				329	self.upload_pkg_file(checksum_path, path)
				330
				331
				332	def upload_pkg_file(self, file_path, upload_path):
				333	'''
				334	Upload a single file. Depending on the upload path, the appropriate
				335	method for that protocol is called. Currently this simply copies the
				336	file to the target directory (but can be extended for other protocols)
				337	This assumes that the web server is running on the same machine where
				338	the method is being called from. The upload_path's files are
				339	basically served by that web server.
				340	'''
				341	try:
				342	shutil.copy(file_path, upload_path)
				343	os.chmod(os.path.join(upload_path,
				344	os.path.basename(file_path)), 0755)
				345	except (IOError, os.error), why:
				346	raise PackageUploadError("Upload of %s to %s failed: %s"
				347	% (file_path, upload_path, why))
				348
				349
				350	def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
				351	'''
				352	Remove the package from the specified remove_path
				353	pkg_name : name of the package (ex: test-sleeptest.tar.bz2,
				354	dep-gcc.tar.bz2)
				355	remove_path : the location to remove the package from.
				356
				357	'''
				358	if remove_path:
				359	remove_path_list = [remove_path]
				360	elif len(self.upload_paths) > 0:
				361	remove_path_list = self.upload_paths
				362	else:
				363	raise PackageRemoveError("Invalid path to remove the pkg from")
				364
				365	checksum_path = self._get_checksum_file_path()
				366
				367	if remove_checksum:
				368	self.remove_checksum(pkg_name)
				369
				370	# remove the package and upload the checksum file to the repos
				371	for path in remove_path_list:
				372	self.remove_pkg_file(pkg_name, path)
				373	self.upload_pkg_file(checksum_path, path)
				374
				375
				376	def remove_pkg_file(self, file_name, pkg_dir):
				377	'''
				378	Remove the file named file_name from pkg_dir
				379	'''
				380	try:
				381	# Remove the file
				382	os.remove(os.path.join(pkg_dir, file_name))
				383	except (IOError, os.error), why:
				384	raise PackageRemoveError("Could not remove %s from %s: %s "
				385	% (file_name, pkg_dir, why))
				386
				387
				388	def _get_checksum_file_path(self):
				389	'''
				390	Return the complete path of the checksum file (assumed to be stored
				391	in self.pkgmgr_dir
				392	'''
				393	return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
				394
				395
				396	def _get_checksum_dict(self):
				397	'''
				398	Fetch the checksum file if not already fetched. If the checksum file
				399	cannot be fetched from the repos then a new file is created with
				400	the current package's (specified in pkg_path) checksum value in it.
				401	Populate the local checksum dictionary with the values read from
				402	the checksum file.
				403	The checksum file is assumed to be present in self.pkgmgr_dir
				404	'''
				405	checksum_path = self._get_checksum_file_path()
				406	if not self._checksum_dict:
				407	# Fetch the checksum file
				408	try:
				409	try:
				410	self._run_command("ls %s" % checksum_path)
				411	except (error.CmdError, error.AutoservRunError):
				412	# The packages checksum file does not exist locally.
				413	# See if it is present in the repositories.
				414	self.fetch_pkg(CHECKSUM_FILE, checksum_path,
				415	use_checksum=False)
				416	except PackageFetchError, e:
				417	# This should not happen whilst fetching a package..if a
				418	# package is present in the repository, the corresponding
				419	# checksum file should also be automatically present. This
				420	# case happens only when a package
				421	# is being uploaded and if it is the first package to be
				422	# uploaded to the repos (hence no checksum file created yet)
				423	# Return an empty dictionary in that case
				424	return {}
				425
				426	# Read the checksum file into memory
				427	checksum_file_contents = self._run_command('cat '
				428	+ checksum_path).stdout
				429
				430	# Return {} if we have an empty checksum file present
				431	if not checksum_file_contents.strip():
				432	return {}
				433
				434	# Parse the checksum file contents into self._checksum_dict
				435	for line in checksum_file_contents.splitlines():
				436	checksum, package_name = line.split(None, 1)
				437	self._checksum_dict[package_name] = checksum
				438
				439	return self._checksum_dict
				440
				441
				442	def _save_checksum_dict(self, checksum_dict):
				443	'''
				444	Save the checksum dictionary onto the checksum file. Update the
				445	local _checksum_dict variable with this new set of values.
				446	checksum_dict : New checksum dictionary
				447	checksum_dir : The directory in which to store the checksum file to.
				448	'''
				449	checksum_path = self._get_checksum_file_path()
				450	self._checksum_dict = checksum_dict.copy()
				451	checksum_contents = '\n'.join(checksum + ' ' + pkg_name
				452	for pkg_name,checksum in
				453	checksum_dict.iteritems())
				454	# Write the checksum file back to disk
				455	self._run_command('echo "%s" > %s' % (checksum_contents,
				456	checksum_path))
				457
				458
				459	def compute_checksum(self, pkg_path):
				460	'''
				461	Compute the MD5 checksum for the package file and return it.
				462	pkg_path : The complete path for the package file
				463	'''
				464	md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
				465	return md5sum_output.split()[0]
				466
				467
				468	def update_checksum(self, pkg_path):
				469	'''
				470	Update the checksum of the package in the packages' checksum
				471	file. This method is called whenever a package is fetched just
				472	to be sure that the checksums in the local file are the latest.
				473	pkg_path : The complete path to the package file.
				474	'''
				475	# Compute the new checksum
				476	new_checksum = self.compute_checksum(pkg_path)
				477	checksum_dict = self._get_checksum_dict()
				478	checksum_dict[os.path.basename(pkg_path)] = new_checksum
				479	self._save_checksum_dict(checksum_dict)
				480
				481
				482	def remove_checksum(self, pkg_name):
				483	'''
				484	Remove the checksum of the package from the packages checksum file.
				485	This method is called whenever a package is removed from the
				486	repositories in order clean its corresponding checksum.
				487	pkg_name : The name of the package to be removed
				488	'''
				489	checksum_dict = self._get_checksum_dict()
				490	if pkg_name in checksum_dict:
				491	del checksum_dict[pkg_name]
				492	self._save_checksum_dict(checksum_dict)
				493
				494
				495	def compare_checksum(self, pkg_path, repo_url):
				496	'''
				497	Calculate the checksum of the file specified in pkg_path and
				498	compare it with the checksum in the checksum file
				499	Return True if both match else return False.
				500	pkg_path : The full path to the package file for which the
				501	checksum is being compared
				502	repo_url : The URL to fetch the checksum from
				503	'''
				504	checksum_dict = self._get_checksum_dict()
				505	package_name = os.path.basename(pkg_path)
				506	if not checksum_dict or package_name not in checksum_dict:
				507	return False
				508
				509	repository_checksum = checksum_dict[package_name]
				510	local_checksum = self.compute_checksum(pkg_path)
				511	return (local_checksum == repository_checksum)
				512
				513
				514	def tar_package(self, pkg_name, src_dir, dest_dir, exclude_dirs=None):
				515	'''
				516	Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
				517	Excludes the directories specified in exclude_dirs while tarring
				518	the source. Returns the tarball path.
				519	'''
				520	exclude_string = ''
				521	if exclude_dirs:
				522	exclude_string = " ".join('--exclude=%s/*' % ex_dir
				523	for ex_dir in exclude_dirs)
				524	# The '.' here is needed to zip the files in the current
				525	# directory. We use '-C' for tar to change to the required
				526	# directory i.e. src_dir and then zip up the files in that
				527	# directory(which is '.') excluding the ones in the exclude_dirs
				528	exclude_string += " ."
				529
				530	tarball_path = os.path.join(dest_dir, pkg_name)
				531
				532	utils.system("tar -cvjf %s -C %s %s "
				533	% (tarball_path, src_dir, exclude_string))
				534
				535	return tarball_path
				536
				537
				538	def untar_required(self, tarball_path, dest_dir):
				539	'''
				540	Compare the checksum of the tarball_path with the .checksum file
				541	in the dest_dir and return False if it matches. The untar
				542	of the package happens only if the checksums do not match.
				543	'''
				544	checksum_path = os.path.join(dest_dir, '.checksum')
				545	try:
				546	existing_checksum = self._run_command('cat ' + checksum_path).stdout
				547	except (error.CmdError, error.AutoservRunError):
				548	# If the .checksum file is not present (generally, this should
				549	# not be the case) then return True so that the untar happens
				550	return True
				551
				552	new_checksum = self.compute_checksum(tarball_path)
				553	return (new_checksum.strip() != existing_checksum.strip())
				554
				555
				556	def untar_pkg(self, tarball_path, dest_dir):
				557	'''
				558	Untar the package present in the tarball_path and put a
				559	".checksum" file in the dest_dir containing the checksum
				560	of the tarball. This method
				561	assumes that the package to be untarred is of the form
				562	<name>.tar.bz2
				563	'''
				564	self._run_command('tar xvjf %s -C %s' % (tarball_path, dest_dir))
				565	# Put the .checksum file in the install_dir to note
				566	# where the package came from
				567	pkg_checksum = self.compute_checksum(tarball_path)
				568	pkg_checksum_path = os.path.join(dest_dir,
				569	'.checksum')
				570	self._run_command('echo "%s" > %s '
				571	% (pkg_checksum, pkg_checksum_path))
				572
				573
				574	def get_tarball_name(self, name, pkg_type):
				575	return "%s-%s.tar.bz2" % (pkg_type, name)
				576
				577
				578	def is_url(self, url):
				579	"""Return true if path looks like a URL"""
				580	return url.startswith('http://')
				581
				582
				583	def get_package_name(self, url, pkg_type):
				584	'''
				585	Extract the group and test name for the url. This method is currently
				586	used only for tests.
				587	'''
				588	if pkg_type == 'test':
				589	regex = '([^:]+://(.)/([^/])$'
				590	return self._get_package_name(url, regex)
				591	else:
				592	return ('', url)
				593
				594
				595	def _get_package_name(self, url, regex):
				596	if not self.is_url(url):
				597	if url.endswith('.tar.bz2'):
				598	testname = url.replace('.tar.bz2', '')
				599	testname = re.sub(r'(\d*)\.', '', testname)
				600	return (testname, testname)
				601	else:
				602	return ('', url)
				603
				604	match = re.match(regex, url)
				605	if not match:
				606	return ('', url)
				607	group, filename = match.groups()
				608	# Generate the group prefix.
				609	group = re.sub(r'\W', '_', group)
				610	# Drop the extension to get the raw test name.
				611	testname = re.sub(r'\.tar\.bz2', '', filename)
				612	# Drop any random numbers at the end of the test name if any
				613	testname = re.sub(r'\.(\d*)', '', testname)
				614	return (group, testname)
				615
				616
				617	# site_packages.py may be non-existant or empty, make sure that an appropriate
				618	# SitePackage class is created nevertheless
				619	try:
				620	from site_packages import SitePackageManager
				621	except ImportError:
				622	class SitePackageManager(BasePackageManager):
				623	pass
				624
				625	class PackageManager(SitePackageManager):
				626	pass