| #!/usr/bin/python |
| # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """A library to assist automatically downloading files. |
| |
| This library is used by scripts that download tarballs, zipfiles, etc. as part |
| of the build process. |
| """ |
| |
| import hashlib |
| import http_download |
| import os.path |
| import re |
| import shutil |
| import sys |
| import time |
| import urllib2 |
| |
| SOURCE_STAMP = 'SOURCE_URL' |
| HASH_STAMP = 'SOURCE_SHA1' |
| |
| |
| # Designed to handle more general inputs than sys.platform because the platform |
| # name may come from the command line. |
| PLATFORM_COLLAPSE = { |
| 'windows': 'windows', |
| 'win32': 'windows', |
| 'cygwin': 'windows', |
| 'linux': 'linux', |
| 'linux2': 'linux', |
| 'linux3': 'linux', |
| 'darwin': 'mac', |
| 'mac': 'mac', |
| } |
| |
| ARCH_COLLAPSE = { |
| 'i386' : 'x86', |
| 'i686' : 'x86', |
| 'x86_64': 'x86', |
| 'armv7l': 'arm', |
| } |
| |
| |
| class HashError(Exception): |
| def __init__(self, download_url, expected_hash, actual_hash): |
| self.download_url = download_url |
| self.expected_hash = expected_hash |
| self.actual_hash = actual_hash |
| |
| def __str__(self): |
| return 'Got hash "%s" but expected hash "%s" for "%s"' % ( |
| self.actual_hash, self.expected_hash, self.download_url) |
| |
| |
| def PlatformName(name=None): |
| if name is None: |
| name = sys.platform |
| return PLATFORM_COLLAPSE[name] |
| |
| def ArchName(name=None): |
| if name is None: |
| if PlatformName() == 'windows': |
| # TODO(pdox): Figure out how to auto-detect 32-bit vs 64-bit Windows. |
| name = 'i386' |
| else: |
| import platform |
| name = platform.machine() |
| return ARCH_COLLAPSE[name] |
| |
| def EnsureFileCanBeWritten(filename): |
| directory = os.path.dirname(filename) |
| if not os.path.exists(directory): |
| os.makedirs(directory) |
| |
| |
| def WriteData(filename, data): |
| EnsureFileCanBeWritten(filename) |
| f = open(filename, 'wb') |
| f.write(data) |
| f.close() |
| |
| |
| def WriteDataFromStream(filename, stream, chunk_size, verbose=True): |
| EnsureFileCanBeWritten(filename) |
| dst = open(filename, 'wb') |
| try: |
| while True: |
| data = stream.read(chunk_size) |
| if len(data) == 0: |
| break |
| dst.write(data) |
| if verbose: |
| # Indicate that we're still writing. |
| sys.stdout.write('.') |
| sys.stdout.flush() |
| finally: |
| if verbose: |
| sys.stdout.write('\n') |
| dst.close() |
| |
| |
| def DoesStampMatch(stampfile, expected, index): |
| try: |
| f = open(stampfile, 'r') |
| stamp = f.read() |
| f.close() |
| if stamp.split('\n')[index] == expected: |
| return "already up-to-date." |
| elif stamp.startswith('manual'): |
| return "manual override." |
| return False |
| except IOError: |
| return False |
| |
| |
| def WriteStamp(stampfile, data): |
| EnsureFileCanBeWritten(stampfile) |
| f = open(stampfile, 'w') |
| f.write(data) |
| f.close() |
| |
| |
| def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0): |
| stampfile = os.path.join(path, stamp_name) |
| |
| # Check if the stampfile is older than the minimum last mod time |
| if min_time: |
| try: |
| stamp_time = os.stat(stampfile).st_mtime |
| if stamp_time <= min_time: |
| return False |
| except OSError: |
| return False |
| |
| return DoesStampMatch(stampfile, stamp_contents, index) |
| |
| |
| def WriteSourceStamp(path, url): |
| stampfile = os.path.join(path, SOURCE_STAMP) |
| WriteStamp(stampfile, url) |
| |
| def WriteHashStamp(path, hash_val): |
| hash_stampfile = os.path.join(path, HASH_STAMP) |
| WriteStamp(hash_stampfile, hash_val) |
| |
| |
| def Retry(op, *args): |
| # Windows seems to be prone to having commands that delete files or |
| # directories fail. We currently do not have a complete understanding why, |
| # and as a workaround we simply retry the command a few times. |
| # It appears that file locks are hanging around longer than they should. This |
| # may be a secondary effect of processes hanging around longer than they |
| # should. This may be because when we kill a browser sel_ldr does not exit |
| # immediately, etc. |
| # Virus checkers can also accidently prevent files from being deleted, but |
| # that shouldn't be a problem on the bots. |
| if sys.platform in ('win32', 'cygwin'): |
| count = 0 |
| while True: |
| try: |
| op(*args) |
| break |
| except Exception: |
| sys.stdout.write("FAILED: %s %s\n" % (op.__name__, repr(args))) |
| count += 1 |
| if count < 5: |
| sys.stdout.write("RETRY: %s %s\n" % (op.__name__, repr(args))) |
| time.sleep(pow(2, count)) |
| else: |
| # Don't mask the exception. |
| raise |
| else: |
| op(*args) |
| |
| |
| def MoveDirCleanly(src, dst): |
| RemoveDir(dst) |
| MoveDir(src, dst) |
| |
| |
| def MoveDir(src, dst): |
| Retry(shutil.move, src, dst) |
| |
| |
| def RemoveDir(path): |
| if os.path.exists(path): |
| Retry(shutil.rmtree, path) |
| |
| |
| def RemoveFile(path): |
| if os.path.exists(path): |
| Retry(os.unlink, path) |
| |
| |
| def _HashFileHandle(fh): |
| """sha1 of a file like object. |
| |
| Arguments: |
| fh: file handle like object to hash. |
| Returns: |
| sha1 as a string. |
| """ |
| hasher = hashlib.sha1() |
| try: |
| while True: |
| data = fh.read(4096) |
| if not data: |
| break |
| hasher.update(data) |
| finally: |
| fh.close() |
| return hasher.hexdigest() |
| |
| |
| def HashFile(filename): |
| """sha1 a file on disk. |
| |
| Arguments: |
| filename: filename to hash. |
| Returns: |
| sha1 as a string. |
| """ |
| fh = open(filename, 'rb') |
| return _HashFileHandle(fh) |
| |
| |
| def HashUrlByDownloading(url): |
| """sha1 the data at an url. |
| |
| Arguments: |
| url: url to download from. |
| Returns: |
| sha1 of the data at the url. |
| """ |
| try: |
| fh = urllib2.urlopen(url) |
| except: |
| sys.stderr.write("Failed fetching URL: %s\n" % url) |
| raise |
| return _HashFileHandle(fh) |
| |
| |
| # Attempts to get the SHA1 hash of a file given a URL by looking for |
| # an adjacent file with a ".sha1hash" suffix. This saves having to |
| # download a large tarball just to get its hash. Otherwise, we fall |
| # back to downloading the main file. |
| def HashUrl(url): |
| hash_url = '%s.sha1hash' % url |
| try: |
| fh = urllib2.urlopen(hash_url) |
| data = fh.read(100) |
| fh.close() |
| except urllib2.HTTPError, exn: |
| if exn.code == 404: |
| return HashUrlByDownloading(url) |
| raise |
| else: |
| if not re.match('[0-9a-f]{40}\n?$', data): |
| raise AssertionError('Bad SHA1 hash file: %r' % data) |
| return data.strip() |
| |
| |
| def SyncURL(url, filename=None, stamp_dir=None, min_time=None, |
| hash_val=None, keep=False, verbose=False, stamp_index=0): |
| """Synchronize a destination file with a URL |
| |
| if the URL does not match the URL stamp, then we must re-download it. |
| |
| Arugments: |
| url: the url which will to compare against and download |
| filename: the file to create on download |
| path: the download path |
| stamp_dir: the filename containing the URL stamp to check against |
| hash_val: if set, the expected hash which must be matched |
| verbose: prints out status as it runs |
| stamp_index: index within the stamp file to check. |
| Returns: |
| True if the file is replaced |
| False if the file is not replaced |
| Exception: |
| HashError: if the hash does not match |
| """ |
| |
| assert url and filename |
| |
| # If we are not keeping the tarball, or we already have it, we can |
| # skip downloading it for this reason. If we are keeping it, |
| # it must exist. |
| if keep: |
| tarball_ok = os.path.isfile(filename) |
| else: |
| tarball_ok = True |
| |
| # If we don't need the tarball and the stamp_file matches the url, then |
| # we must be up to date. If the URL differs but the recorded hash matches |
| # the one we'll insist the tarball has, then that's good enough too. |
| # TODO(mcgrathr): Download the .sha1sum file first to compare with |
| # the cached hash, in case --file-hash options weren't used. |
| if tarball_ok and stamp_dir is not None: |
| if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time): |
| if verbose: |
| print '%s is already up to date.' % filename |
| return False |
| if (hash_val is not None and |
| StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)): |
| if verbose: |
| print '%s is identical to the up to date file.' % filename |
| return False |
| |
| if verbose: |
| print 'Updating %s\n\tfrom %s.' % (filename, url) |
| EnsureFileCanBeWritten(filename) |
| http_download.HttpDownload(url, filename) |
| |
| if hash_val: |
| tar_hash = HashFile(filename) |
| if hash_val != tar_hash: |
| raise HashError(actual_hash=tar_hash, expected_hash=hash_val, |
| download_url=url) |
| |
| return True |