blob: 4680cfe3ca42cd0844611b1e945f276e02ee4a57 [file] [log] [blame]
Adam Langleye9ada862015-05-11 17:20:37 -07001# Copyright (c) 2015, Google Inc.
2#
3# Permission to use, copy, modify, and/or distribute this software for any
4# purpose with or without fee is hereby granted, provided that the above
5# copyright notice and this permission notice appear in all copies.
6#
7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15"""Extracts archives."""
16
17
David Benjamin1b249672016-12-06 18:25:50 -050018import hashlib
Adam Langleye9ada862015-05-11 17:20:37 -070019import optparse
20import os
21import os.path
22import tarfile
23import shutil
24import sys
25import zipfile
26
27
28def CheckedJoin(output, path):
29 """
30 CheckedJoin returns os.path.join(output, path). It does sanity checks to
31 ensure the resulting path is under output, but shouldn't be used on untrusted
32 input.
33 """
34 path = os.path.normpath(path)
35 if os.path.isabs(path) or path.startswith('.'):
36 raise ValueError(path)
37 return os.path.join(output, path)
38
39
Robert Sloan8ff03552017-06-14 12:40:58 -070040class FileEntry(object):
41 def __init__(self, path, mode, fileobj):
42 self.path = path
43 self.mode = mode
44 self.fileobj = fileobj
45
46
47class SymlinkEntry(object):
48 def __init__(self, path, mode, target):
49 self.path = path
50 self.mode = mode
51 self.target = target
52
53
Adam Langleye9ada862015-05-11 17:20:37 -070054def IterateZip(path):
55 """
Robert Sloan8ff03552017-06-14 12:40:58 -070056 IterateZip opens the zip file at path and returns a generator of entry objects
57 for each file in it.
Adam Langleye9ada862015-05-11 17:20:37 -070058 """
59 with zipfile.ZipFile(path, 'r') as zip_file:
60 for info in zip_file.infolist():
61 if info.filename.endswith('/'):
62 continue
Robert Sloan8ff03552017-06-14 12:40:58 -070063 yield FileEntry(info.filename, None, zip_file.open(info))
Adam Langleye9ada862015-05-11 17:20:37 -070064
65
Robert Sloan8ff03552017-06-14 12:40:58 -070066def IterateTar(path, compression):
Adam Langleye9ada862015-05-11 17:20:37 -070067 """
Robert Sloan8ff03552017-06-14 12:40:58 -070068 IterateTar opens the tar.gz or tar.bz2 file at path and returns a generator of
69 entry objects for each file in it.
Adam Langleye9ada862015-05-11 17:20:37 -070070 """
Robert Sloan8ff03552017-06-14 12:40:58 -070071 with tarfile.open(path, 'r:' + compression) as tar_file:
Adam Langleye9ada862015-05-11 17:20:37 -070072 for info in tar_file:
73 if info.isdir():
Robert Sloan8ff03552017-06-14 12:40:58 -070074 pass
75 elif info.issym():
76 yield SymlinkEntry(info.name, None, info.linkname)
77 elif info.isfile():
78 yield FileEntry(info.name, info.mode, tar_file.extractfile(info))
79 else:
Adam Langleye9ada862015-05-11 17:20:37 -070080 raise ValueError('Unknown entry type "%s"' % (info.name, ))
Adam Langleye9ada862015-05-11 17:20:37 -070081
82
83def main(args):
84 parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT')
85 parser.add_option('--no-prefix', dest='no_prefix', action='store_true',
86 help='Do not remove a prefix from paths in the archive.')
87 options, args = parser.parse_args(args)
88
89 if len(args) != 2:
90 parser.print_help()
91 return 1
92
93 archive, output = args
94
95 if not os.path.exists(archive):
96 # Skip archives that weren't downloaded.
97 return 0
98
David Benjamin1b249672016-12-06 18:25:50 -050099 with open(archive) as f:
100 sha256 = hashlib.sha256()
101 while True:
102 chunk = f.read(1024 * 1024)
103 if not chunk:
104 break
105 sha256.update(chunk)
106 digest = sha256.hexdigest()
107
108 stamp_path = os.path.join(output, ".boringssl_archive_digest")
109 if os.path.exists(stamp_path):
110 with open(stamp_path) as f:
111 if f.read().strip() == digest:
112 print "Already up-to-date."
113 return 0
114
Adam Langleye9ada862015-05-11 17:20:37 -0700115 if archive.endswith('.zip'):
116 entries = IterateZip(archive)
117 elif archive.endswith('.tar.gz'):
Robert Sloan8ff03552017-06-14 12:40:58 -0700118 entries = IterateTar(archive, 'gz')
119 elif archive.endswith('.tar.bz2'):
120 entries = IterateTar(archive, 'bz2')
Adam Langleye9ada862015-05-11 17:20:37 -0700121 else:
122 raise ValueError(archive)
123
124 try:
125 if os.path.exists(output):
126 print "Removing %s" % (output, )
127 shutil.rmtree(output)
128
129 print "Extracting %s to %s" % (archive, output)
130 prefix = None
131 num_extracted = 0
Robert Sloan8ff03552017-06-14 12:40:58 -0700132 for entry in entries:
Adam Langleye9ada862015-05-11 17:20:37 -0700133 # Even on Windows, zip files must always use forward slashes.
Robert Sloan8ff03552017-06-14 12:40:58 -0700134 if '\\' in entry.path or entry.path.startswith('/'):
135 raise ValueError(entry.path)
Adam Langleye9ada862015-05-11 17:20:37 -0700136
137 if not options.no_prefix:
Robert Sloan8ff03552017-06-14 12:40:58 -0700138 new_prefix, rest = entry.path.split('/', 1)
Adam Langleye9ada862015-05-11 17:20:37 -0700139
140 # Ensure the archive is consistent.
141 if prefix is None:
142 prefix = new_prefix
143 if prefix != new_prefix:
144 raise ValueError((prefix, new_prefix))
145 else:
Robert Sloan8ff03552017-06-14 12:40:58 -0700146 rest = entry.path
Adam Langleye9ada862015-05-11 17:20:37 -0700147
148 # Extract the file into the output directory.
149 fixed_path = CheckedJoin(output, rest)
150 if not os.path.isdir(os.path.dirname(fixed_path)):
151 os.makedirs(os.path.dirname(fixed_path))
Robert Sloan8ff03552017-06-14 12:40:58 -0700152 if isinstance(entry, FileEntry):
153 with open(fixed_path, 'wb') as out:
154 shutil.copyfileobj(entry.fileobj, out)
155 elif isinstance(entry, SymlinkEntry):
156 os.symlink(entry.target, fixed_path)
157 else:
158 raise TypeError('unknown entry type')
Adam Langleye9ada862015-05-11 17:20:37 -0700159
160 # Fix up permissions if needbe.
161 # TODO(davidben): To be extra tidy, this should only track the execute bit
162 # as in git.
Robert Sloan8ff03552017-06-14 12:40:58 -0700163 if entry.mode is not None:
164 os.chmod(fixed_path, entry.mode)
Adam Langleye9ada862015-05-11 17:20:37 -0700165
166 # Print every 100 files, so bots do not time out on large archives.
167 num_extracted += 1
168 if num_extracted % 100 == 0:
169 print "Extracted %d files..." % (num_extracted,)
170 finally:
171 entries.close()
172
David Benjamin1b249672016-12-06 18:25:50 -0500173 with open(stamp_path, 'w') as f:
174 f.write(digest)
Adam Langleye9ada862015-05-11 17:20:37 -0700175
David Benjamin1b249672016-12-06 18:25:50 -0500176 print "Done. Extracted %d files." % (num_extracted,)
Adam Langleye9ada862015-05-11 17:20:37 -0700177 return 0
178
179
180if __name__ == '__main__':
181 sys.exit(main(sys.argv[1:]))