blob: f0198c5a408471fbeb76b80683a9c8a37f77d66a [file] [log] [blame]
(raulenrique)dfdda472018-06-04 12:02:29 -07001# Copyright (C) 2018 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Functions to process archive files."""
15
16import os
17import tempfile
18import tarfile
19import urllib.parse
20import zipfile
21
22
23class ZipFileWithPermission(zipfile.ZipFile):
24 """Subclassing Zipfile to preserve file permission.
25
26 See https://bugs.python.org/issue15795
27 """
28
Haibo Huangbb3a4562019-01-31 16:35:08 -080029 def _extract_member(self, member, targetpath, pwd):
30 ret_val = super()._extract_member(member, targetpath, pwd)
(raulenrique)dfdda472018-06-04 12:02:29 -070031
32 if not isinstance(member, zipfile.ZipInfo):
33 member = self.getinfo(member)
34 attr = member.external_attr >> 16
35 if attr != 0:
36 os.chmod(ret_val, attr)
37 return ret_val
38
39
40def unzip(archive_path, target_path):
41 """Extracts zip file to a path.
42
43 Args:
44 archive_path: Path to the zip file.
45 target_path: Path to extract files to.
46 """
47
48 with ZipFileWithPermission(archive_path) as zfile:
49 zfile.extractall(target_path)
50
51
52def untar(archive_path, target_path):
53 """Extracts tar file to a path.
54
55 Args:
56 archive_path: Path to the tar file.
57 target_path: Path to extract files to.
58 """
59
60 with tarfile.open(archive_path, mode='r') as tfile:
61 tfile.extractall(target_path)
62
63
64ARCHIVE_TYPES = {
65 '.zip': unzip,
66 '.tar.gz': untar,
67 '.tar.bz2': untar,
68 '.tar.xz': untar,
69}
70
71
72def is_supported_archive(url):
73 """Checks whether the url points to a supported archive."""
74 return get_extract_func(url) is not None
75
76
77def get_extract_func(url):
78 """Gets the function to extract an archive.
79
80 Args:
81 url: The url to the archive file.
82
83 Returns:
84 A function to extract the archive. None if not found.
85 """
86
87 parsed_url = urllib.parse.urlparse(url)
88 filename = os.path.basename(parsed_url.path)
89 for ext, func in ARCHIVE_TYPES.items():
90 if filename.endswith(ext):
91 return func
92 return None
93
94
95def download_and_extract(url):
96 """Downloads and extracts an archive file to a temporary directory.
97
98 Args:
99 url: Url to download.
100
101 Returns:
102 Path to the temporary directory.
103 """
104
105 print('Downloading {}'.format(url))
106 archive_file, _headers = urllib.request.urlretrieve(url)
107
108 temporary_dir = tempfile.mkdtemp()
109 print('Extracting {} to {}'.format(archive_file, temporary_dir))
110 get_extract_func(url)(archive_file, temporary_dir)
111
112 return temporary_dir
113
114
115def find_archive_root(path):
116 """Finds the real root of an extracted archive.
117
118 Sometimes archives has additional layers of directories. This function tries
119 to guess the right 'root' path by entering all single sub-directories.
120
121 Args:
122 path: Path to the extracted archive.
123
124 Returns:
125 The root path we found.
126 """
127 for root, dirs, files in os.walk(path):
128 if files or len(dirs) > 1:
129 return root
130 return path