blob: d76b3165039db11eedbf1403f3154250bb259b6f [file] [log] [blame]
showard9d02fb52008-08-08 18:20:37 +00001#!/usr/bin/python
2
3"""
4This module defines the BasePackageManager Class which provides an
5implementation of the packaging system API providing methods to fetch,
6upload and remove packages. Site specific extensions to any of these methods
7should inherit this class.
8"""
9
10import re, os, sys, traceback, subprocess, shutil, time, traceback, urlparse
11import fcntl
12from autotest_lib.client.common_lib import error, utils
13
14
15class PackageUploadError(error.AutotestError):
16 'Raised when there is an error uploading the package'
17
18class PackageFetchError(error.AutotestError):
19 'Raised when there is an error fetching the package'
20
21class PackageRemoveError(error.AutotestError):
22 'Raised when there is an error removing the package'
23
24class PackageInstallError(error.AutotestError):
25 'Raised when there is an error installing the package'
26
27# the name of the checksum file that stores the packages' checksums
28CHECKSUM_FILE = "packages.checksum"
29
30class BasePackageManager(object):
31 _repo_exception = {}
32 REPO_OK = object()
33
34 def __init__(self, pkgmgr_dir, repo_urls=None, upload_paths=None,
35 do_locking=True, run_function=utils.run, run_function_args=[],
36 run_function_dargs={}):
37 '''
38 repo_urls: The list of the repository urls which is consulted
39 whilst fetching the package
40 upload_paths: The list of the upload of repositories to which
41 the package is uploaded to
42 pkgmgr_dir : A directory that can be used by the package manager
43 to dump stuff (like checksum files of the repositories
44 etc.).
45 do_locking : Enable locking when the packages are installed.
46
47 run_function is used to execute the commands throughout this file.
48 It defaults to utils.run() but a custom method (if provided) should
49 be of the same schema as utils.run. It should return a CmdResult
50 object and throw a CmdError exception. The reason for using a separate
51 function to run the commands is that the same code can be run to fetch
52 a package on the local machine or on a remote machine (in which case
53 ssh_host's run function is passed in for run_function).
54 '''
55 # In memory dictionary that stores the checksum's of packages
56 self._checksum_dict = {}
57
58 self.pkgmgr_dir = pkgmgr_dir
59 self.do_locking = do_locking
60
61 # Process the repository URLs and the upload paths if specified
62 if not repo_urls:
63 self.repo_urls = []
64 else:
65 self.repo_urls = list(repo_urls)
66 if not upload_paths:
67 self.upload_paths = []
68 else:
69 self.upload_paths = list(upload_paths)
70
71 # Create an internal function that is a simple wrapper of
72 # run_function and takes in the args and dargs as arguments
73 def _run_command(command, _run_command_args=run_function_args,
74 _run_command_dargs={}):
75 '''
76 Special internal function that takes in a command as
77 argument and passes it on to run_function (if specified).
78 The _run_command_dargs are merged into run_function_dargs
79 with the former having more precedence than the latter.
80 '''
81 new_dargs = dict(run_function_dargs)
82 new_dargs.update(_run_command_dargs)
83
84 return run_function(command, *_run_command_args,
85 **new_dargs)
86
87 self._run_command = _run_command
88
89
90 def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
91 preserve_install_dir=False, repo_url=None):
92 '''
93 Remove install_dir if it already exists and then recreate it unless
94 preserve_install_dir is specified as True.
95 Fetch the package into the pkg_dir. Untar the package into install_dir
96 The assumption is that packages are of the form :
97 <pkg_type>.<pkg_name>.tar.bz2
98 name : name of the package
99 type : type of the package
100 fetch_dir : The directory into which the package tarball will be
101 fetched to.
102 install_dir : the directory where the package files will be untarred to
103 repo_url : the url of the repository to fetch the package from.
104 '''
105
106 # do_locking flag is on by default unless you disable it (typically
107 # in the cases where packages are directly installed from the server
108 # onto the client in which case fcntl stuff wont work as the code
109 # will run on the server in that case..
110 if self.do_locking:
111 lockfile_name = '.%s-%s-lock' % (name, pkg_type)
112 lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
113
114 try:
115 if self.do_locking:
116 fcntl.flock(lockfile, fcntl.LOCK_EX)
117
118 self._run_command('mkdir -p %s' % fetch_dir)
119
120 pkg_name = self.get_tarball_name(name, pkg_type)
121 fetch_path = os.path.join(fetch_dir, pkg_name)
122 try:
123 # Fetch the package into fetch_dir
124 self.fetch_pkg(pkg_name, fetch_path)
125
126 # check to see if the install_dir exists and if it does
127 # then check to see if the .checksum file is the latest
128 install_dir_exists = False
129 try:
130 self._run_command("ls %s" % install_dir)
131 install_dir_exists = True
132 except (error.CmdError, error.AutoservRunError):
133 pass
134
135 if (install_dir_exists and
136 not self.untar_required(fetch_path, install_dir)):
137 return
138
139 # untar the package into install_dir and
140 # update the checksum in that directory
141 if not preserve_install_dir:
142 # Make sure we clean up the install_dir
143 self._run_command('rm -rf %s' % install_dir)
144 self._run_command('mkdir -p %s' % install_dir)
145
146 self.untar_pkg(fetch_path, install_dir)
147
148 except PackageFetchError, why:
149 raise PackageInstallError('Installation of %s(type:%s) failed'
150 ' : %s' % (name, pkg_type, why))
151 finally:
152 if self.do_locking:
153 fcntl.flock(lockfile, fcntl.LOCK_UN)
154 lockfile.close()
155
156
157 def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=True):
158 '''
159 Fetch the package into dest_dir from repo_url. By default repo_url
160 is None and the package is looked in all the repostories specified.
161 Otherwise it fetches it from the specific repo_url.
162 pkg_name : name of the package (ex: test-sleeptest.tar.bz2,
163 dep-gcc.tar.bz2, kernel.1-1.rpm)
164 repo_url : the URL of the repository where the package is located.
165 dest_path : complete path of where the package will be fetched to.
166 use_checksum : This is set to False to fetch the packages.checksum file
167 so that the checksum comparison is bypassed for the
168 checksum file itself. This is used internally by the
169 packaging system. It should be ignored by externals
170 callers of this method who use it fetch custom packages.
171 '''
172
173 try:
174 self._run_command("ls %s" % os.path.dirname(dest_path))
175 except (error.CmdError, error.AutoservRunError):
176 raise PackageFetchError("Please provide a valid "
177 "destination: %s " % dest_path)
178
179 # See if the package was already fetched earlier, if so
180 # the checksums need to be compared and the package is now
181 # fetched only if they differ.
182 pkg_exists = False
183 try:
184 self._run_command("ls %s" % dest_path)
185 pkg_exists = True
186 except (error.CmdError, error.AutoservRunError):
187 pass
188
189 # if a repository location is explicitly provided, fetch the package
190 # from there and return
191 if repo_url:
192 repo_url_list = [repo_url]
193 elif len(self.repo_urls) > 0:
194 repo_url_list = self.repo_urls
195 else:
196 raise PackageFetchError("There are no repository urls specified")
197
198 error_msgs = {}
199 for location in repo_url_list:
200 try:
201 # Fetch the checksum if it not there
202 if not use_checksum:
203 self.fetch_pkg_file(pkg_name, dest_path, location)
204
205 # Fetch the package if a) the pkg does not exist or
206 # b) if the checksum differs for the existing package
207 elif (not pkg_exists or
208 not self.compare_checksum(dest_path, location)):
209 self.fetch_pkg_file(pkg_name, dest_path, location)
210 # Update the checksum of the package in the packages'
211 # checksum file
212 self.update_checksum(dest_path)
213 return
214 except (PackageFetchError, error.AutoservRunError), e:
215 # The package could not be found in this repo, continue looking
216 error_msgs[location] = str(e)
217 print >> sys.stderr, ('Package - could not be fetched from '
218 '- %s : %s' % (location, e))
219
220 # if we got here then that means the package is not found
221 # in any of the repositories.
222 raise PackageFetchError("Package could not be fetched from any of"
223 " the repos %s : %s " % (repo_url_list,
224 error_msgs))
225
226
227 def fetch_pkg_file(self, file_name, dest_path, source_url):
228 """
229 Fetch the file from source_url into dest_path. The package repository
230 url is parsed and the appropriate retrieval method is determined.
231
232 """
233 if source_url.startswith('http://'):
234 self.fetch_file_http(file_name, dest_path, source_url)
235 else:
236 raise PackageFetchError("Invalid location specified")
237
238
239 def fetch_file_http(self, file_name, dest_path, source_url):
240 """
241 Fetch the package using http protocol. Raises a PackageFetchError.
242 """
243 # check to see if the source_url is reachable or not
244 self.run_http_test(source_url, os.path.dirname(dest_path))
245
246 pkg_path = os.path.join(source_url, file_name)
247 try:
248 self._run_command('wget %s -O %s' % (pkg_path, dest_path))
249 except error.CmdError, e:
250 raise PackageFetchError("Package - %s not found in %s: %s"
251 % (file_name, source_url, e))
252
253
254 def run_http_test(self, source_url, dest_dir):
255 '''
256 Run a simple 30 sec wget on source_url
257 just to see if it can be reachable or not. This avoids the need
258 for waiting for a 10min timeout.
259 '''
260 dest_file_path = os.path.join(dest_dir, 'http_test_file')
261
262 BPM = BasePackageManager
263 error_msg = "HTTP test failed. Failed to contact"
264 # We should never get here unless the source_url starts with http://
265 assert(source_url.startswith('http://'))
266
267 # Get the http server name from the URL
268 server_name = urlparse.urlparse(source_url)[1]
269 http_cmd = 'printf "GET / HTTP/1.0\n\n" | nc %s 80' % server_name
270
271 if server_name in BPM._repo_exception:
272 if BPM._repo_exception[server_name] == BPM.REPO_OK:
273 # This repository is fine. Simply return
274 return
275 else:
276 raise PackageFetchError("%s - %s : %s "
277 % (error_msg, server_name,
278 BPM._repo_exception[server_name]))
279 try:
280 try:
281 self._run_command(http_cmd,
282 _run_command_dargs={'timeout':30})
283 BPM._repo_exception[server_name] = BPM.REPO_OK
284 finally:
285 self._run_command('rm -f %s' % dest_file_path)
286 except error.CmdError, e:
287 BPM._repo_exception[server_name] = e
288 raise PackageFetchError("%s - %s: %s " % (error_msg,
289 server_name, e))
290
291
292
293 # TODO(aganti): Fix the bug with the current checksum logic where
294 # packages' checksums that are not present consistently in all the
295 # repositories are not handled properly. This is a corner case though
296 # but the ideal solution is to make the checksum file repository specific
297 # and then maintain it.
298 def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False):
299 '''
300 Uploads to a specified upload_path or to all the repos.
301 Also uploads the checksum file to all the repos.
302 pkg_path : The complete path to the package file
303 upload_path : the absolute path where the files are copied to.
304 if set to 'None' assumes 'all' repos
305 update_checksum : If set to False, the checksum file is not
306 going to be updated which happens by default.
307 This is necessary for custom
308 packages (like custom kernels and custom tests)
309 that get uploaded which do not need to be part of
310 the checksum file and bloat it.
311 '''
312 if update_checksum:
313 # get the packages' checksum file and update it with the current
314 # package's checksum
315 checksum_path = self._get_checksum_file_path()
316 self.update_checksum(pkg_path)
317
318 if upload_path:
319 upload_path_list = [upload_path]
320 elif len(self.upload_paths) > 0:
321 upload_path_list = self.upload_paths
322 else:
323 raise PackageUploadError("Invalid Upload Path specified")
324
325 # upload the package
326 for path in upload_path_list:
327 self.upload_pkg_file(pkg_path, path)
328 if update_checksum:
329 self.upload_pkg_file(checksum_path, path)
330
331
332 def upload_pkg_file(self, file_path, upload_path):
333 '''
334 Upload a single file. Depending on the upload path, the appropriate
335 method for that protocol is called. Currently this simply copies the
336 file to the target directory (but can be extended for other protocols)
337 This assumes that the web server is running on the same machine where
338 the method is being called from. The upload_path's files are
339 basically served by that web server.
340 '''
341 try:
342 shutil.copy(file_path, upload_path)
343 os.chmod(os.path.join(upload_path,
344 os.path.basename(file_path)), 0755)
345 except (IOError, os.error), why:
346 raise PackageUploadError("Upload of %s to %s failed: %s"
347 % (file_path, upload_path, why))
348
349
350 def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
351 '''
352 Remove the package from the specified remove_path
353 pkg_name : name of the package (ex: test-sleeptest.tar.bz2,
354 dep-gcc.tar.bz2)
355 remove_path : the location to remove the package from.
356
357 '''
358 if remove_path:
359 remove_path_list = [remove_path]
360 elif len(self.upload_paths) > 0:
361 remove_path_list = self.upload_paths
362 else:
363 raise PackageRemoveError("Invalid path to remove the pkg from")
364
365 checksum_path = self._get_checksum_file_path()
366
367 if remove_checksum:
368 self.remove_checksum(pkg_name)
369
370 # remove the package and upload the checksum file to the repos
371 for path in remove_path_list:
372 self.remove_pkg_file(pkg_name, path)
373 self.upload_pkg_file(checksum_path, path)
374
375
376 def remove_pkg_file(self, file_name, pkg_dir):
377 '''
378 Remove the file named file_name from pkg_dir
379 '''
380 try:
381 # Remove the file
382 os.remove(os.path.join(pkg_dir, file_name))
383 except (IOError, os.error), why:
384 raise PackageRemoveError("Could not remove %s from %s: %s "
385 % (file_name, pkg_dir, why))
386
387
388 def _get_checksum_file_path(self):
389 '''
390 Return the complete path of the checksum file (assumed to be stored
391 in self.pkgmgr_dir
392 '''
393 return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
394
395
396 def _get_checksum_dict(self):
397 '''
398 Fetch the checksum file if not already fetched. If the checksum file
399 cannot be fetched from the repos then a new file is created with
400 the current package's (specified in pkg_path) checksum value in it.
401 Populate the local checksum dictionary with the values read from
402 the checksum file.
403 The checksum file is assumed to be present in self.pkgmgr_dir
404 '''
405 checksum_path = self._get_checksum_file_path()
406 if not self._checksum_dict:
407 # Fetch the checksum file
408 try:
409 try:
410 self._run_command("ls %s" % checksum_path)
411 except (error.CmdError, error.AutoservRunError):
412 # The packages checksum file does not exist locally.
413 # See if it is present in the repositories.
414 self.fetch_pkg(CHECKSUM_FILE, checksum_path,
415 use_checksum=False)
416 except PackageFetchError, e:
417 # This should not happen whilst fetching a package..if a
418 # package is present in the repository, the corresponding
419 # checksum file should also be automatically present. This
420 # case happens only when a package
421 # is being uploaded and if it is the first package to be
422 # uploaded to the repos (hence no checksum file created yet)
423 # Return an empty dictionary in that case
424 return {}
425
426 # Read the checksum file into memory
427 checksum_file_contents = self._run_command('cat '
428 + checksum_path).stdout
429
430 # Return {} if we have an empty checksum file present
431 if not checksum_file_contents.strip():
432 return {}
433
434 # Parse the checksum file contents into self._checksum_dict
435 for line in checksum_file_contents.splitlines():
436 checksum, package_name = line.split(None, 1)
437 self._checksum_dict[package_name] = checksum
438
439 return self._checksum_dict
440
441
442 def _save_checksum_dict(self, checksum_dict):
443 '''
444 Save the checksum dictionary onto the checksum file. Update the
445 local _checksum_dict variable with this new set of values.
446 checksum_dict : New checksum dictionary
447 checksum_dir : The directory in which to store the checksum file to.
448 '''
449 checksum_path = self._get_checksum_file_path()
450 self._checksum_dict = checksum_dict.copy()
451 checksum_contents = '\n'.join(checksum + ' ' + pkg_name
452 for pkg_name,checksum in
453 checksum_dict.iteritems())
454 # Write the checksum file back to disk
455 self._run_command('echo "%s" > %s' % (checksum_contents,
456 checksum_path))
457
458
459 def compute_checksum(self, pkg_path):
460 '''
461 Compute the MD5 checksum for the package file and return it.
462 pkg_path : The complete path for the package file
463 '''
464 md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
465 return md5sum_output.split()[0]
466
467
468 def update_checksum(self, pkg_path):
469 '''
470 Update the checksum of the package in the packages' checksum
471 file. This method is called whenever a package is fetched just
472 to be sure that the checksums in the local file are the latest.
473 pkg_path : The complete path to the package file.
474 '''
475 # Compute the new checksum
476 new_checksum = self.compute_checksum(pkg_path)
477 checksum_dict = self._get_checksum_dict()
478 checksum_dict[os.path.basename(pkg_path)] = new_checksum
479 self._save_checksum_dict(checksum_dict)
480
481
482 def remove_checksum(self, pkg_name):
483 '''
484 Remove the checksum of the package from the packages checksum file.
485 This method is called whenever a package is removed from the
486 repositories in order clean its corresponding checksum.
487 pkg_name : The name of the package to be removed
488 '''
489 checksum_dict = self._get_checksum_dict()
490 if pkg_name in checksum_dict:
491 del checksum_dict[pkg_name]
492 self._save_checksum_dict(checksum_dict)
493
494
495 def compare_checksum(self, pkg_path, repo_url):
496 '''
497 Calculate the checksum of the file specified in pkg_path and
498 compare it with the checksum in the checksum file
499 Return True if both match else return False.
500 pkg_path : The full path to the package file for which the
501 checksum is being compared
502 repo_url : The URL to fetch the checksum from
503 '''
504 checksum_dict = self._get_checksum_dict()
505 package_name = os.path.basename(pkg_path)
506 if not checksum_dict or package_name not in checksum_dict:
507 return False
508
509 repository_checksum = checksum_dict[package_name]
510 local_checksum = self.compute_checksum(pkg_path)
511 return (local_checksum == repository_checksum)
512
513
514 def tar_package(self, pkg_name, src_dir, dest_dir, exclude_dirs=None):
515 '''
516 Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
517 Excludes the directories specified in exclude_dirs while tarring
518 the source. Returns the tarball path.
519 '''
520 exclude_string = ''
521 if exclude_dirs:
522 exclude_string = " ".join('--exclude=%s/*' % ex_dir
523 for ex_dir in exclude_dirs)
524 # The '.' here is needed to zip the files in the current
525 # directory. We use '-C' for tar to change to the required
526 # directory i.e. src_dir and then zip up the files in that
527 # directory(which is '.') excluding the ones in the exclude_dirs
528 exclude_string += " ."
529
530 tarball_path = os.path.join(dest_dir, pkg_name)
531
532 utils.system("tar -cvjf %s -C %s %s "
533 % (tarball_path, src_dir, exclude_string))
534
535 return tarball_path
536
537
538 def untar_required(self, tarball_path, dest_dir):
539 '''
540 Compare the checksum of the tarball_path with the .checksum file
541 in the dest_dir and return False if it matches. The untar
542 of the package happens only if the checksums do not match.
543 '''
544 checksum_path = os.path.join(dest_dir, '.checksum')
545 try:
546 existing_checksum = self._run_command('cat ' + checksum_path).stdout
547 except (error.CmdError, error.AutoservRunError):
548 # If the .checksum file is not present (generally, this should
549 # not be the case) then return True so that the untar happens
550 return True
551
552 new_checksum = self.compute_checksum(tarball_path)
553 return (new_checksum.strip() != existing_checksum.strip())
554
555
556 def untar_pkg(self, tarball_path, dest_dir):
557 '''
558 Untar the package present in the tarball_path and put a
559 ".checksum" file in the dest_dir containing the checksum
560 of the tarball. This method
561 assumes that the package to be untarred is of the form
562 <name>.tar.bz2
563 '''
564 self._run_command('tar xvjf %s -C %s' % (tarball_path, dest_dir))
565 # Put the .checksum file in the install_dir to note
566 # where the package came from
567 pkg_checksum = self.compute_checksum(tarball_path)
568 pkg_checksum_path = os.path.join(dest_dir,
569 '.checksum')
570 self._run_command('echo "%s" > %s '
571 % (pkg_checksum, pkg_checksum_path))
572
573
574 def get_tarball_name(self, name, pkg_type):
575 return "%s-%s.tar.bz2" % (pkg_type, name)
576
577
578 def is_url(self, url):
579 """Return true if path looks like a URL"""
580 return url.startswith('http://')
581
582
583 def get_package_name(self, url, pkg_type):
584 '''
585 Extract the group and test name for the url. This method is currently
586 used only for tests.
587 '''
588 if pkg_type == 'test':
589 regex = '([^:]+://(.*)/([^/]*)$'
590 return self._get_package_name(url, regex)
591 else:
592 return ('', url)
593
594
595 def _get_package_name(self, url, regex):
596 if not self.is_url(url):
597 if url.endswith('.tar.bz2'):
598 testname = url.replace('.tar.bz2', '')
599 testname = re.sub(r'(\d*)\.', '', testname)
600 return (testname, testname)
601 else:
602 return ('', url)
603
604 match = re.match(regex, url)
605 if not match:
606 return ('', url)
607 group, filename = match.groups()
608 # Generate the group prefix.
609 group = re.sub(r'\W', '_', group)
610 # Drop the extension to get the raw test name.
611 testname = re.sub(r'\.tar\.bz2', '', filename)
612 # Drop any random numbers at the end of the test name if any
613 testname = re.sub(r'\.(\d*)', '', testname)
614 return (group, testname)
615
616
617# site_packages.py may be non-existant or empty, make sure that an appropriate
618# SitePackage class is created nevertheless
619try:
620 from site_packages import SitePackageManager
621except ImportError:
622 class SitePackageManager(BasePackageManager):
623 pass
624
625class PackageManager(SitePackageManager):
626 pass