blob: a24e9161c9ada5d6531db6318efabba00be5514f [file] [log] [blame]
showard9d02fb52008-08-08 18:20:37 +00001#!/usr/bin/python
2
3"""
4This module defines the BasePackageManager Class which provides an
5implementation of the packaging system API providing methods to fetch,
6upload and remove packages. Site specific extensions to any of these methods
7should inherit this class.
8"""
9
10import re, os, sys, traceback, subprocess, shutil, time, traceback, urlparse
11import fcntl
12from autotest_lib.client.common_lib import error, utils
13
14
15class PackageUploadError(error.AutotestError):
16 'Raised when there is an error uploading the package'
17
18class PackageFetchError(error.AutotestError):
19 'Raised when there is an error fetching the package'
20
21class PackageRemoveError(error.AutotestError):
22 'Raised when there is an error removing the package'
23
24class PackageInstallError(error.AutotestError):
25 'Raised when there is an error installing the package'
26
27# the name of the checksum file that stores the packages' checksums
28CHECKSUM_FILE = "packages.checksum"
29
30class BasePackageManager(object):
31 _repo_exception = {}
32 REPO_OK = object()
33
34 def __init__(self, pkgmgr_dir, repo_urls=None, upload_paths=None,
35 do_locking=True, run_function=utils.run, run_function_args=[],
36 run_function_dargs={}):
37 '''
38 repo_urls: The list of the repository urls which is consulted
39 whilst fetching the package
40 upload_paths: The list of the upload of repositories to which
41 the package is uploaded to
42 pkgmgr_dir : A directory that can be used by the package manager
43 to dump stuff (like checksum files of the repositories
44 etc.).
45 do_locking : Enable locking when the packages are installed.
46
47 run_function is used to execute the commands throughout this file.
48 It defaults to utils.run() but a custom method (if provided) should
49 be of the same schema as utils.run. It should return a CmdResult
50 object and throw a CmdError exception. The reason for using a separate
51 function to run the commands is that the same code can be run to fetch
52 a package on the local machine or on a remote machine (in which case
53 ssh_host's run function is passed in for run_function).
54 '''
55 # In memory dictionary that stores the checksum's of packages
56 self._checksum_dict = {}
57
58 self.pkgmgr_dir = pkgmgr_dir
59 self.do_locking = do_locking
60
61 # Process the repository URLs and the upload paths if specified
62 if not repo_urls:
63 self.repo_urls = []
64 else:
65 self.repo_urls = list(repo_urls)
66 if not upload_paths:
67 self.upload_paths = []
68 else:
69 self.upload_paths = list(upload_paths)
70
71 # Create an internal function that is a simple wrapper of
72 # run_function and takes in the args and dargs as arguments
73 def _run_command(command, _run_command_args=run_function_args,
74 _run_command_dargs={}):
75 '''
76 Special internal function that takes in a command as
77 argument and passes it on to run_function (if specified).
78 The _run_command_dargs are merged into run_function_dargs
79 with the former having more precedence than the latter.
80 '''
81 new_dargs = dict(run_function_dargs)
82 new_dargs.update(_run_command_dargs)
83
84 return run_function(command, *_run_command_args,
85 **new_dargs)
86
87 self._run_command = _run_command
88
89
90 def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
91 preserve_install_dir=False, repo_url=None):
92 '''
93 Remove install_dir if it already exists and then recreate it unless
94 preserve_install_dir is specified as True.
95 Fetch the package into the pkg_dir. Untar the package into install_dir
96 The assumption is that packages are of the form :
97 <pkg_type>.<pkg_name>.tar.bz2
98 name : name of the package
99 type : type of the package
100 fetch_dir : The directory into which the package tarball will be
101 fetched to.
102 install_dir : the directory where the package files will be untarred to
103 repo_url : the url of the repository to fetch the package from.
104 '''
105
106 # do_locking flag is on by default unless you disable it (typically
107 # in the cases where packages are directly installed from the server
108 # onto the client in which case fcntl stuff wont work as the code
109 # will run on the server in that case..
110 if self.do_locking:
111 lockfile_name = '.%s-%s-lock' % (name, pkg_type)
112 lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
113
114 try:
115 if self.do_locking:
116 fcntl.flock(lockfile, fcntl.LOCK_EX)
117
118 self._run_command('mkdir -p %s' % fetch_dir)
119
120 pkg_name = self.get_tarball_name(name, pkg_type)
121 fetch_path = os.path.join(fetch_dir, pkg_name)
122 try:
123 # Fetch the package into fetch_dir
124 self.fetch_pkg(pkg_name, fetch_path)
125
126 # check to see if the install_dir exists and if it does
127 # then check to see if the .checksum file is the latest
128 install_dir_exists = False
129 try:
130 self._run_command("ls %s" % install_dir)
131 install_dir_exists = True
132 except (error.CmdError, error.AutoservRunError):
133 pass
134
135 if (install_dir_exists and
136 not self.untar_required(fetch_path, install_dir)):
137 return
138
139 # untar the package into install_dir and
140 # update the checksum in that directory
141 if not preserve_install_dir:
142 # Make sure we clean up the install_dir
143 self._run_command('rm -rf %s' % install_dir)
144 self._run_command('mkdir -p %s' % install_dir)
145
146 self.untar_pkg(fetch_path, install_dir)
147
148 except PackageFetchError, why:
149 raise PackageInstallError('Installation of %s(type:%s) failed'
150 ' : %s' % (name, pkg_type, why))
151 finally:
152 if self.do_locking:
153 fcntl.flock(lockfile, fcntl.LOCK_UN)
154 lockfile.close()
155
156
157 def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=True):
158 '''
159 Fetch the package into dest_dir from repo_url. By default repo_url
160 is None and the package is looked in all the repostories specified.
161 Otherwise it fetches it from the specific repo_url.
162 pkg_name : name of the package (ex: test-sleeptest.tar.bz2,
163 dep-gcc.tar.bz2, kernel.1-1.rpm)
164 repo_url : the URL of the repository where the package is located.
165 dest_path : complete path of where the package will be fetched to.
166 use_checksum : This is set to False to fetch the packages.checksum file
167 so that the checksum comparison is bypassed for the
168 checksum file itself. This is used internally by the
169 packaging system. It should be ignored by externals
170 callers of this method who use it fetch custom packages.
171 '''
172
173 try:
174 self._run_command("ls %s" % os.path.dirname(dest_path))
175 except (error.CmdError, error.AutoservRunError):
176 raise PackageFetchError("Please provide a valid "
177 "destination: %s " % dest_path)
178
179 # See if the package was already fetched earlier, if so
180 # the checksums need to be compared and the package is now
181 # fetched only if they differ.
182 pkg_exists = False
183 try:
184 self._run_command("ls %s" % dest_path)
185 pkg_exists = True
186 except (error.CmdError, error.AutoservRunError):
187 pass
188
189 # if a repository location is explicitly provided, fetch the package
190 # from there and return
191 if repo_url:
192 repo_url_list = [repo_url]
193 elif len(self.repo_urls) > 0:
194 repo_url_list = self.repo_urls
195 else:
196 raise PackageFetchError("There are no repository urls specified")
197
198 error_msgs = {}
199 for location in repo_url_list:
200 try:
201 # Fetch the checksum if it not there
202 if not use_checksum:
203 self.fetch_pkg_file(pkg_name, dest_path, location)
204
205 # Fetch the package if a) the pkg does not exist or
206 # b) if the checksum differs for the existing package
207 elif (not pkg_exists or
208 not self.compare_checksum(dest_path, location)):
209 self.fetch_pkg_file(pkg_name, dest_path, location)
210 # Update the checksum of the package in the packages'
211 # checksum file
212 self.update_checksum(dest_path)
213 return
214 except (PackageFetchError, error.AutoservRunError), e:
215 # The package could not be found in this repo, continue looking
216 error_msgs[location] = str(e)
217 print >> sys.stderr, ('Package - could not be fetched from '
218 '- %s : %s' % (location, e))
219
220 # if we got here then that means the package is not found
221 # in any of the repositories.
222 raise PackageFetchError("Package could not be fetched from any of"
223 " the repos %s : %s " % (repo_url_list,
224 error_msgs))
225
226
227 def fetch_pkg_file(self, file_name, dest_path, source_url):
228 """
229 Fetch the file from source_url into dest_path. The package repository
230 url is parsed and the appropriate retrieval method is determined.
231
232 """
233 if source_url.startswith('http://'):
234 self.fetch_file_http(file_name, dest_path, source_url)
235 else:
236 raise PackageFetchError("Invalid location specified")
237
238
239 def fetch_file_http(self, file_name, dest_path, source_url):
240 """
241 Fetch the package using http protocol. Raises a PackageFetchError.
242 """
243 # check to see if the source_url is reachable or not
244 self.run_http_test(source_url, os.path.dirname(dest_path))
245
246 pkg_path = os.path.join(source_url, file_name)
247 try:
248 self._run_command('wget %s -O %s' % (pkg_path, dest_path))
249 except error.CmdError, e:
250 raise PackageFetchError("Package - %s not found in %s: %s"
251 % (file_name, source_url, e))
252
253
254 def run_http_test(self, source_url, dest_dir):
255 '''
256 Run a simple 30 sec wget on source_url
257 just to see if it can be reachable or not. This avoids the need
258 for waiting for a 10min timeout.
259 '''
260 dest_file_path = os.path.join(dest_dir, 'http_test_file')
261
262 BPM = BasePackageManager
263 error_msg = "HTTP test failed. Failed to contact"
264 # We should never get here unless the source_url starts with http://
265 assert(source_url.startswith('http://'))
266
267 # Get the http server name from the URL
268 server_name = urlparse.urlparse(source_url)[1]
269 http_cmd = 'printf "GET / HTTP/1.0\n\n" | nc %s 80' % server_name
270
271 if server_name in BPM._repo_exception:
272 if BPM._repo_exception[server_name] == BPM.REPO_OK:
273 # This repository is fine. Simply return
274 return
275 else:
276 raise PackageFetchError("%s - %s : %s "
277 % (error_msg, server_name,
278 BPM._repo_exception[server_name]))
279 try:
280 try:
281 self._run_command(http_cmd,
282 _run_command_dargs={'timeout':30})
283 BPM._repo_exception[server_name] = BPM.REPO_OK
284 finally:
285 self._run_command('rm -f %s' % dest_file_path)
286 except error.CmdError, e:
287 BPM._repo_exception[server_name] = e
288 raise PackageFetchError("%s - %s: %s " % (error_msg,
289 server_name, e))
290
291
292
293 # TODO(aganti): Fix the bug with the current checksum logic where
294 # packages' checksums that are not present consistently in all the
295 # repositories are not handled properly. This is a corner case though
296 # but the ideal solution is to make the checksum file repository specific
297 # and then maintain it.
298 def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False):
299 '''
300 Uploads to a specified upload_path or to all the repos.
301 Also uploads the checksum file to all the repos.
302 pkg_path : The complete path to the package file
303 upload_path : the absolute path where the files are copied to.
304 if set to 'None' assumes 'all' repos
305 update_checksum : If set to False, the checksum file is not
306 going to be updated which happens by default.
307 This is necessary for custom
308 packages (like custom kernels and custom tests)
309 that get uploaded which do not need to be part of
310 the checksum file and bloat it.
311 '''
312 if update_checksum:
313 # get the packages' checksum file and update it with the current
314 # package's checksum
315 checksum_path = self._get_checksum_file_path()
316 self.update_checksum(pkg_path)
317
318 if upload_path:
319 upload_path_list = [upload_path]
320 elif len(self.upload_paths) > 0:
321 upload_path_list = self.upload_paths
322 else:
323 raise PackageUploadError("Invalid Upload Path specified")
324
325 # upload the package
326 for path in upload_path_list:
327 self.upload_pkg_file(pkg_path, path)
328 if update_checksum:
329 self.upload_pkg_file(checksum_path, path)
330
331
332 def upload_pkg_file(self, file_path, upload_path):
333 '''
334 Upload a single file. Depending on the upload path, the appropriate
335 method for that protocol is called. Currently this simply copies the
336 file to the target directory (but can be extended for other protocols)
337 This assumes that the web server is running on the same machine where
338 the method is being called from. The upload_path's files are
339 basically served by that web server.
340 '''
341 try:
342 shutil.copy(file_path, upload_path)
343 os.chmod(os.path.join(upload_path,
344 os.path.basename(file_path)), 0755)
345 except (IOError, os.error), why:
346 raise PackageUploadError("Upload of %s to %s failed: %s"
347 % (file_path, upload_path, why))
348
349
mbligh9fc77972008-10-02 20:32:09 +0000350 def upload_pkg_dir(self, dir_path, upload_path):
351 '''
352 Upload a full directory. Depending on the upload path, the appropriate
353 method for that protocol is called. Currently this copies the whole
354 tmp package directory to the target directory.
355 This assumes that the web server is running on the same machine where
356 the method is being called from. The upload_path's files are
357 basically served by that web server.
358 '''
359 try:
360 utils.run("cp %s/* %s " % (dir_path, upload_path))
361 utils.run("chmod 644 %s/*" % upload_path)
362 except (IOError, os.error), why:
363 raise PackageUploadError("Upload of %s to %s failed: %s"
364 % (dir_path, upload_path, why))
365
366
showard9d02fb52008-08-08 18:20:37 +0000367 def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
368 '''
369 Remove the package from the specified remove_path
370 pkg_name : name of the package (ex: test-sleeptest.tar.bz2,
371 dep-gcc.tar.bz2)
372 remove_path : the location to remove the package from.
373
374 '''
375 if remove_path:
376 remove_path_list = [remove_path]
377 elif len(self.upload_paths) > 0:
378 remove_path_list = self.upload_paths
379 else:
380 raise PackageRemoveError("Invalid path to remove the pkg from")
381
382 checksum_path = self._get_checksum_file_path()
383
384 if remove_checksum:
385 self.remove_checksum(pkg_name)
386
387 # remove the package and upload the checksum file to the repos
388 for path in remove_path_list:
389 self.remove_pkg_file(pkg_name, path)
390 self.upload_pkg_file(checksum_path, path)
391
392
393 def remove_pkg_file(self, file_name, pkg_dir):
394 '''
395 Remove the file named file_name from pkg_dir
396 '''
397 try:
398 # Remove the file
399 os.remove(os.path.join(pkg_dir, file_name))
400 except (IOError, os.error), why:
401 raise PackageRemoveError("Could not remove %s from %s: %s "
402 % (file_name, pkg_dir, why))
403
404
405 def _get_checksum_file_path(self):
406 '''
407 Return the complete path of the checksum file (assumed to be stored
408 in self.pkgmgr_dir
409 '''
410 return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
411
412
413 def _get_checksum_dict(self):
414 '''
415 Fetch the checksum file if not already fetched. If the checksum file
416 cannot be fetched from the repos then a new file is created with
417 the current package's (specified in pkg_path) checksum value in it.
418 Populate the local checksum dictionary with the values read from
419 the checksum file.
420 The checksum file is assumed to be present in self.pkgmgr_dir
421 '''
422 checksum_path = self._get_checksum_file_path()
423 if not self._checksum_dict:
424 # Fetch the checksum file
425 try:
426 try:
427 self._run_command("ls %s" % checksum_path)
428 except (error.CmdError, error.AutoservRunError):
429 # The packages checksum file does not exist locally.
430 # See if it is present in the repositories.
431 self.fetch_pkg(CHECKSUM_FILE, checksum_path,
432 use_checksum=False)
433 except PackageFetchError, e:
434 # This should not happen whilst fetching a package..if a
435 # package is present in the repository, the corresponding
436 # checksum file should also be automatically present. This
437 # case happens only when a package
438 # is being uploaded and if it is the first package to be
439 # uploaded to the repos (hence no checksum file created yet)
440 # Return an empty dictionary in that case
441 return {}
442
443 # Read the checksum file into memory
444 checksum_file_contents = self._run_command('cat '
445 + checksum_path).stdout
446
447 # Return {} if we have an empty checksum file present
448 if not checksum_file_contents.strip():
449 return {}
450
451 # Parse the checksum file contents into self._checksum_dict
452 for line in checksum_file_contents.splitlines():
453 checksum, package_name = line.split(None, 1)
454 self._checksum_dict[package_name] = checksum
455
456 return self._checksum_dict
457
458
459 def _save_checksum_dict(self, checksum_dict):
460 '''
461 Save the checksum dictionary onto the checksum file. Update the
462 local _checksum_dict variable with this new set of values.
463 checksum_dict : New checksum dictionary
464 checksum_dir : The directory in which to store the checksum file to.
465 '''
466 checksum_path = self._get_checksum_file_path()
467 self._checksum_dict = checksum_dict.copy()
468 checksum_contents = '\n'.join(checksum + ' ' + pkg_name
469 for pkg_name,checksum in
470 checksum_dict.iteritems())
471 # Write the checksum file back to disk
472 self._run_command('echo "%s" > %s' % (checksum_contents,
473 checksum_path))
474
475
476 def compute_checksum(self, pkg_path):
477 '''
478 Compute the MD5 checksum for the package file and return it.
479 pkg_path : The complete path for the package file
480 '''
481 md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
482 return md5sum_output.split()[0]
483
484
485 def update_checksum(self, pkg_path):
486 '''
487 Update the checksum of the package in the packages' checksum
488 file. This method is called whenever a package is fetched just
489 to be sure that the checksums in the local file are the latest.
490 pkg_path : The complete path to the package file.
491 '''
492 # Compute the new checksum
493 new_checksum = self.compute_checksum(pkg_path)
494 checksum_dict = self._get_checksum_dict()
495 checksum_dict[os.path.basename(pkg_path)] = new_checksum
496 self._save_checksum_dict(checksum_dict)
497
498
499 def remove_checksum(self, pkg_name):
500 '''
501 Remove the checksum of the package from the packages checksum file.
502 This method is called whenever a package is removed from the
503 repositories in order clean its corresponding checksum.
504 pkg_name : The name of the package to be removed
505 '''
506 checksum_dict = self._get_checksum_dict()
507 if pkg_name in checksum_dict:
508 del checksum_dict[pkg_name]
509 self._save_checksum_dict(checksum_dict)
510
511
512 def compare_checksum(self, pkg_path, repo_url):
513 '''
514 Calculate the checksum of the file specified in pkg_path and
515 compare it with the checksum in the checksum file
516 Return True if both match else return False.
517 pkg_path : The full path to the package file for which the
518 checksum is being compared
519 repo_url : The URL to fetch the checksum from
520 '''
521 checksum_dict = self._get_checksum_dict()
522 package_name = os.path.basename(pkg_path)
523 if not checksum_dict or package_name not in checksum_dict:
524 return False
525
526 repository_checksum = checksum_dict[package_name]
527 local_checksum = self.compute_checksum(pkg_path)
528 return (local_checksum == repository_checksum)
529
530
mblighdbfc4e32008-08-22 18:08:07 +0000531 def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None):
showard9d02fb52008-08-08 18:20:37 +0000532 '''
533 Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
mbligh9fc77972008-10-02 20:32:09 +0000534 Excludes the directories specified in exclude_string while tarring
showard9d02fb52008-08-08 18:20:37 +0000535 the source. Returns the tarball path.
536 '''
showard9d02fb52008-08-08 18:20:37 +0000537 tarball_path = os.path.join(dest_dir, pkg_name)
mbligh9fc77972008-10-02 20:32:09 +0000538 cmd = "tar -cvjf %s -C %s %s " % (tarball_path, src_dir, exclude_string)
showard9d02fb52008-08-08 18:20:37 +0000539
mbligh9fc77972008-10-02 20:32:09 +0000540 utils.system(cmd)
showard9d02fb52008-08-08 18:20:37 +0000541 return tarball_path
542
543
544 def untar_required(self, tarball_path, dest_dir):
545 '''
546 Compare the checksum of the tarball_path with the .checksum file
547 in the dest_dir and return False if it matches. The untar
548 of the package happens only if the checksums do not match.
549 '''
550 checksum_path = os.path.join(dest_dir, '.checksum')
551 try:
552 existing_checksum = self._run_command('cat ' + checksum_path).stdout
553 except (error.CmdError, error.AutoservRunError):
554 # If the .checksum file is not present (generally, this should
555 # not be the case) then return True so that the untar happens
556 return True
557
558 new_checksum = self.compute_checksum(tarball_path)
559 return (new_checksum.strip() != existing_checksum.strip())
560
561
562 def untar_pkg(self, tarball_path, dest_dir):
563 '''
564 Untar the package present in the tarball_path and put a
565 ".checksum" file in the dest_dir containing the checksum
566 of the tarball. This method
567 assumes that the package to be untarred is of the form
568 <name>.tar.bz2
569 '''
570 self._run_command('tar xvjf %s -C %s' % (tarball_path, dest_dir))
571 # Put the .checksum file in the install_dir to note
572 # where the package came from
573 pkg_checksum = self.compute_checksum(tarball_path)
574 pkg_checksum_path = os.path.join(dest_dir,
575 '.checksum')
576 self._run_command('echo "%s" > %s '
577 % (pkg_checksum, pkg_checksum_path))
578
579
580 def get_tarball_name(self, name, pkg_type):
581 return "%s-%s.tar.bz2" % (pkg_type, name)
582
583
584 def is_url(self, url):
585 """Return true if path looks like a URL"""
586 return url.startswith('http://')
587
588
589 def get_package_name(self, url, pkg_type):
590 '''
591 Extract the group and test name for the url. This method is currently
592 used only for tests.
593 '''
594 if pkg_type == 'test':
595 regex = '([^:]+://(.*)/([^/]*)$'
596 return self._get_package_name(url, regex)
597 else:
598 return ('', url)
599
600
601 def _get_package_name(self, url, regex):
602 if not self.is_url(url):
603 if url.endswith('.tar.bz2'):
604 testname = url.replace('.tar.bz2', '')
605 testname = re.sub(r'(\d*)\.', '', testname)
606 return (testname, testname)
607 else:
608 return ('', url)
609
610 match = re.match(regex, url)
611 if not match:
612 return ('', url)
613 group, filename = match.groups()
614 # Generate the group prefix.
615 group = re.sub(r'\W', '_', group)
616 # Drop the extension to get the raw test name.
617 testname = re.sub(r'\.tar\.bz2', '', filename)
618 # Drop any random numbers at the end of the test name if any
619 testname = re.sub(r'\.(\d*)', '', testname)
620 return (group, testname)
621
622
623# site_packages.py may be non-existant or empty, make sure that an appropriate
624# SitePackage class is created nevertheless
625try:
626 from site_packages import SitePackageManager
627except ImportError:
628 class SitePackageManager(BasePackageManager):
629 pass
630
631class PackageManager(SitePackageManager):
632 pass