blob: e36ce99b1b3a4b3a6e204c0a60397eb10a94bc86 [file] [log] [blame]
Adam Langleye9ada862015-05-11 17:20:37 -07001# Copyright (c) 2015, Google Inc.
2#
3# Permission to use, copy, modify, and/or distribute this software for any
4# purpose with or without fee is hereby granted, provided that the above
5# copyright notice and this permission notice appear in all copies.
6#
7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15"""Extracts archives."""
16
17
David Benjamin1b249672016-12-06 18:25:50 -050018import hashlib
Adam Langleye9ada862015-05-11 17:20:37 -070019import optparse
20import os
21import os.path
22import tarfile
23import shutil
24import sys
25import zipfile
26
27
28def CheckedJoin(output, path):
29 """
30 CheckedJoin returns os.path.join(output, path). It does sanity checks to
31 ensure the resulting path is under output, but shouldn't be used on untrusted
32 input.
33 """
34 path = os.path.normpath(path)
35 if os.path.isabs(path) or path.startswith('.'):
36 raise ValueError(path)
37 return os.path.join(output, path)
38
39
40def IterateZip(path):
41 """
42 IterateZip opens the zip file at path and returns a generator of
43 (filename, mode, fileobj) tuples for each file in it.
44 """
45 with zipfile.ZipFile(path, 'r') as zip_file:
46 for info in zip_file.infolist():
47 if info.filename.endswith('/'):
48 continue
49 yield (info.filename, None, zip_file.open(info))
50
51
52def IterateTar(path):
53 """
54 IterateTar opens the tar.gz file at path and returns a generator of
55 (filename, mode, fileobj) tuples for each file in it.
56 """
57 with tarfile.open(path, 'r:gz') as tar_file:
58 for info in tar_file:
59 if info.isdir():
60 continue
61 if not info.isfile():
62 raise ValueError('Unknown entry type "%s"' % (info.name, ))
63 yield (info.name, info.mode, tar_file.extractfile(info))
64
65
66def main(args):
67 parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT')
68 parser.add_option('--no-prefix', dest='no_prefix', action='store_true',
69 help='Do not remove a prefix from paths in the archive.')
70 options, args = parser.parse_args(args)
71
72 if len(args) != 2:
73 parser.print_help()
74 return 1
75
76 archive, output = args
77
78 if not os.path.exists(archive):
79 # Skip archives that weren't downloaded.
80 return 0
81
David Benjamin1b249672016-12-06 18:25:50 -050082 with open(archive) as f:
83 sha256 = hashlib.sha256()
84 while True:
85 chunk = f.read(1024 * 1024)
86 if not chunk:
87 break
88 sha256.update(chunk)
89 digest = sha256.hexdigest()
90
91 stamp_path = os.path.join(output, ".boringssl_archive_digest")
92 if os.path.exists(stamp_path):
93 with open(stamp_path) as f:
94 if f.read().strip() == digest:
95 print "Already up-to-date."
96 return 0
97
Adam Langleye9ada862015-05-11 17:20:37 -070098 if archive.endswith('.zip'):
99 entries = IterateZip(archive)
100 elif archive.endswith('.tar.gz'):
101 entries = IterateTar(archive)
102 else:
103 raise ValueError(archive)
104
105 try:
106 if os.path.exists(output):
107 print "Removing %s" % (output, )
108 shutil.rmtree(output)
109
110 print "Extracting %s to %s" % (archive, output)
111 prefix = None
112 num_extracted = 0
113 for path, mode, inp in entries:
114 # Even on Windows, zip files must always use forward slashes.
115 if '\\' in path or path.startswith('/'):
116 raise ValueError(path)
117
118 if not options.no_prefix:
119 new_prefix, rest = path.split('/', 1)
120
121 # Ensure the archive is consistent.
122 if prefix is None:
123 prefix = new_prefix
124 if prefix != new_prefix:
125 raise ValueError((prefix, new_prefix))
126 else:
127 rest = path
128
129 # Extract the file into the output directory.
130 fixed_path = CheckedJoin(output, rest)
131 if not os.path.isdir(os.path.dirname(fixed_path)):
132 os.makedirs(os.path.dirname(fixed_path))
133 with open(fixed_path, 'wb') as out:
134 shutil.copyfileobj(inp, out)
135
136 # Fix up permissions if needbe.
137 # TODO(davidben): To be extra tidy, this should only track the execute bit
138 # as in git.
139 if mode is not None:
140 os.chmod(fixed_path, mode)
141
142 # Print every 100 files, so bots do not time out on large archives.
143 num_extracted += 1
144 if num_extracted % 100 == 0:
145 print "Extracted %d files..." % (num_extracted,)
146 finally:
147 entries.close()
148
David Benjamin1b249672016-12-06 18:25:50 -0500149 with open(stamp_path, 'w') as f:
150 f.write(digest)
Adam Langleye9ada862015-05-11 17:20:37 -0700151
David Benjamin1b249672016-12-06 18:25:50 -0500152 print "Done. Extracted %d files." % (num_extracted,)
Adam Langleye9ada862015-05-11 17:20:37 -0700153 return 0
154
155
156if __name__ == '__main__':
157 sys.exit(main(sys.argv[1:]))