blob: 3c02776a40655159ee47b45ce019c02eafb99681 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +020013
14try:
15 import zlib
16 del zlib
17 _ZLIB_SUPPORTED = True
18except ImportError:
19 _ZLIB_SUPPORTED = False
Tarek Ziadé396fad72010-02-23 05:30:31 +000020
21try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000022 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010023 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000024 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040025except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000026 _BZ2_SUPPORTED = False
27
28try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030029 import lzma
30 del lzma
31 _LZMA_SUPPORTED = True
32except ImportError:
33 _LZMA_SUPPORTED = False
34
35try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000036 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040037except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000038 getpwnam = None
39
40try:
41 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040042except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000043 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Tarek Ziadéc3399782010-02-23 05:39:18 +000045__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
46 "copytree", "move", "rmtree", "Error", "SpecialFileError",
47 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000048 "register_archive_format", "unregister_archive_format",
49 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020050 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020051 "ignore_patterns", "chown", "which", "get_terminal_size",
52 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020053 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000054
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000056 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000057
Hynek Schlawack48653762012-10-07 12:49:58 +020058class SameFileError(Error):
59 """Raised when source and destination are the same file."""
60
Andrew Svetlov3438fa42012-12-17 23:35:18 +020061class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000062 """Raised when trying to do a kind of operation (e.g. copying) which is
63 not supported on a special file (e.g. a named pipe)"""
64
Andrew Svetlov3438fa42012-12-17 23:35:18 +020065class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000066 """Raised when a command could not be executed"""
67
Andrew Svetlov3438fa42012-12-17 23:35:18 +020068class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000069 """Raised when an archive cannot be read"""
70
71class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030072 """Raised when a registry operation with the archiving
Raymond Hettinger15f44ab2016-08-30 10:47:49 -070073 and unpacking registries fails"""
Tarek Ziadé6ac91722010-04-28 17:51:36 +000074
75
Greg Stein42bb8b32000-07-12 09:55:30 +000076def copyfileobj(fsrc, fdst, length=16*1024):
77 """copy data from file-like object fsrc to file-like object fdst"""
78 while 1:
79 buf = fsrc.read(length)
80 if not buf:
81 break
82 fdst.write(buf)
83
Johannes Gijsbers46f14592004-08-14 13:30:02 +000084def _samefile(src, dst):
85 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000086 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000087 try:
88 return os.path.samefile(src, dst)
89 except OSError:
90 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000091
92 # All other platforms: check for same pathname.
93 return (os.path.normcase(os.path.abspath(src)) ==
94 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000095
Larry Hastingsb4038062012-07-15 10:57:38 -070096def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010097 """Copy data from src to dst.
98
Larry Hastingsb4038062012-07-15 10:57:38 -070099 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +0100100 symlink will be created instead of copying the file it points to.
101
102 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000103 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +0200104 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000105
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000106 for fn in [src, dst]:
107 try:
108 st = os.stat(fn)
109 except OSError:
110 # File most likely does not exist
111 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000112 else:
113 # XXX What about other special files? (sockets, devices...)
114 if stat.S_ISFIFO(st.st_mode):
115 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000116
Larry Hastingsb4038062012-07-15 10:57:38 -0700117 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100118 os.symlink(os.readlink(src), dst)
119 else:
120 with open(src, 'rb') as fsrc:
121 with open(dst, 'wb') as fdst:
122 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500123 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000124
Larry Hastingsb4038062012-07-15 10:57:38 -0700125def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100126 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000127
Larry Hastingsb4038062012-07-15 10:57:38 -0700128 If follow_symlinks is not set, symlinks aren't followed if and only
129 if both `src` and `dst` are symlinks. If `lchmod` isn't available
130 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100131
132 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700133 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100134 if hasattr(os, 'lchmod'):
135 stat_func, chmod_func = os.lstat, os.lchmod
136 else:
137 return
138 elif hasattr(os, 'chmod'):
139 stat_func, chmod_func = os.stat, os.chmod
140 else:
141 return
142
143 st = stat_func(src)
144 chmod_func(dst, stat.S_IMODE(st.st_mode))
145
Larry Hastingsad5ae042012-07-14 17:55:11 -0700146if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700147 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700148 """Copy extended filesystem attributes from `src` to `dst`.
149
150 Overwrite existing attributes.
151
Larry Hastingsb4038062012-07-15 10:57:38 -0700152 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700153
154 """
155
Hynek Schlawack0beab052013-02-05 08:22:44 +0100156 try:
157 names = os.listxattr(src, follow_symlinks=follow_symlinks)
158 except OSError as e:
159 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
160 raise
161 return
162 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700163 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700164 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
165 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700166 except OSError as e:
167 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
168 raise
169else:
170 def _copyxattr(*args, **kwargs):
171 pass
172
Larry Hastingsb4038062012-07-15 10:57:38 -0700173def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100174 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
175
Larry Hastingsb4038062012-07-15 10:57:38 -0700176 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100177 only if both `src` and `dst` are symlinks.
178
179 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700180 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100181 pass
182
Larry Hastings9cf065c2012-06-22 16:30:09 -0700183 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700184 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700185 if follow:
186 # use the real function if it exists
187 def lookup(name):
188 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100189 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700190 # use the real function only if it exists
191 # *and* it supports follow_symlinks
192 def lookup(name):
193 fn = getattr(os, name, _nop)
194 if fn in os.supports_follow_symlinks:
195 return fn
196 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100197
Larry Hastings9cf065c2012-06-22 16:30:09 -0700198 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000199 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700200 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
201 follow_symlinks=follow)
202 try:
203 lookup("chmod")(dst, mode, follow_symlinks=follow)
204 except NotImplementedError:
205 # if we got a NotImplementedError, it's because
206 # * follow_symlinks=False,
207 # * lchown() is unavailable, and
208 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300209 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700210 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
211 # (it returned ENOSUP.)
212 # therefore we're out of options--we simply cannot chown the
213 # symlink. give up, suppress the error.
214 # (which is what shutil always did in this circumstance.)
215 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100216 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000217 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700218 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000219 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700220 for err in 'EOPNOTSUPP', 'ENOTSUP':
221 if hasattr(errno, err) and why.errno == getattr(errno, err):
222 break
223 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000224 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700225 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200226
Larry Hastingsb4038062012-07-15 10:57:38 -0700227def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500228 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000229
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000230 The destination may be a directory.
231
Larry Hastingsb4038062012-07-15 10:57:38 -0700232 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100233 resembles GNU's "cp -P src dst".
234
Hynek Schlawack48653762012-10-07 12:49:58 +0200235 If source and destination are the same file, a SameFileError will be
236 raised.
237
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000238 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000239 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000240 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700241 copyfile(src, dst, follow_symlinks=follow_symlinks)
242 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500243 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000244
Larry Hastingsb4038062012-07-15 10:57:38 -0700245def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500246 """Copy data and all stat info ("cp -p src dst"). Return the file's
247 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000248
249 The destination may be a directory.
250
Larry Hastingsb4038062012-07-15 10:57:38 -0700251 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100252 resembles GNU's "cp -P src dst".
253
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000254 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000255 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000256 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700257 copyfile(src, dst, follow_symlinks=follow_symlinks)
258 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500259 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000260
Georg Brandl2ee470f2008-07-16 12:55:28 +0000261def ignore_patterns(*patterns):
262 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000263
Georg Brandl2ee470f2008-07-16 12:55:28 +0000264 Patterns is a sequence of glob-style patterns
265 that are used to exclude files"""
266 def _ignore_patterns(path, names):
267 ignored_names = []
268 for pattern in patterns:
269 ignored_names.extend(fnmatch.filter(names, pattern))
270 return set(ignored_names)
271 return _ignore_patterns
272
Tarek Ziadéfb437512010-04-20 08:57:33 +0000273def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
274 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000275 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000276
277 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000278 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000279
280 If the optional symlinks flag is true, symbolic links in the
281 source tree result in symbolic links in the destination tree; if
282 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000283 links are copied. If the file pointed by the symlink doesn't
284 exist, an exception will be added in the list of errors raised in
285 an Error exception at the end of the copy process.
286
287 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000288 want to silence this exception. Notice that this has no effect on
289 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000290
Georg Brandl2ee470f2008-07-16 12:55:28 +0000291 The optional ignore argument is a callable. If given, it
292 is called with the `src` parameter, which is the directory
293 being visited by copytree(), and `names` which is the list of
294 `src` contents, as returned by os.listdir():
295
296 callable(src, names) -> ignored_names
297
298 Since copytree() is called recursively, the callable will be
299 called once for each directory that is copied. It returns a
300 list of names relative to the `src` directory that should
301 not be copied.
302
Tarek Ziadé5340db32010-04-19 22:30:51 +0000303 The optional copy_function argument is a callable that will be used
304 to copy each file. It will be called with the source path and the
305 destination path as arguments. By default, copy2() is used, but any
306 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000307
308 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000309 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000310 if ignore is not None:
311 ignored_names = ignore(src, names)
312 else:
313 ignored_names = set()
314
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000315 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000316 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000317 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000318 if name in ignored_names:
319 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000320 srcname = os.path.join(src, name)
321 dstname = os.path.join(dst, name)
322 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000323 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000324 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000325 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100326 # We can't just leave it to `copy_function` because legacy
327 # code with a custom `copy_function` may rely on copytree
328 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000329 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700330 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000331 else:
332 # ignore dangling symlink if the flag is on
333 if not os.path.exists(linkto) and ignore_dangling_symlinks:
334 continue
335 # otherwise let the copy occurs. copy2 will raise an error
Berker Peksag5a294d82015-07-25 14:53:48 +0300336 if os.path.isdir(srcname):
337 copytree(srcname, dstname, symlinks, ignore,
338 copy_function)
339 else:
340 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000341 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000342 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000343 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000344 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000345 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000346 # catch the Error from the recursive copytree so that we can
347 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000348 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000349 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200350 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000351 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352 try:
353 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000354 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200355 # Copying file access times may fail on Windows
Berker Peksag884afd92014-12-10 02:50:32 +0200356 if getattr(why, 'winerror', None) is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200357 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000358 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000359 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500360 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000361
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200362# version vulnerable to race conditions
363def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000364 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200365 with os.scandir(path) as scandir_it:
366 entries = list(scandir_it)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000367 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200368 onerror(os.scandir, path, sys.exc_info())
369 entries = []
370 for entry in entries:
371 fullname = entry.path
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000372 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200373 is_dir = entry.is_dir(follow_symlinks=False)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200374 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200375 is_dir = False
376 if is_dir:
377 try:
378 if entry.is_symlink():
379 # This can only happen if someone replaces
380 # a directory with a symlink after the call to
381 # os.scandir or entry.is_dir above.
382 raise OSError("Cannot call rmtree on a symbolic link")
383 except OSError:
384 onerror(os.path.islink, fullname, sys.exc_info())
385 continue
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200386 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000387 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000388 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200389 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200390 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200391 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000392 try:
393 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200394 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000395 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000396
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200397# Version using fd-based APIs to protect against races
398def _rmtree_safe_fd(topfd, path, onerror):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200399 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200400 with os.scandir(topfd) as scandir_it:
401 entries = list(scandir_it)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100402 except OSError as err:
403 err.filename = path
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200404 onerror(os.scandir, path, sys.exc_info())
405 return
406 for entry in entries:
407 fullname = os.path.join(path, entry.name)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200408 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200409 is_dir = entry.is_dir(follow_symlinks=False)
410 if is_dir:
411 orig_st = entry.stat(follow_symlinks=False)
412 is_dir = stat.S_ISDIR(orig_st.st_mode)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100413 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200414 is_dir = False
415 if is_dir:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200416 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200417 dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100418 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200419 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200420 else:
421 try:
422 if os.path.samestat(orig_st, os.fstat(dirfd)):
423 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200424 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200425 os.rmdir(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100426 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200427 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100428 else:
429 try:
430 # This can only happen if someone replaces
431 # a directory with a symlink after the call to
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200432 # os.scandir or stat.S_ISDIR above.
Hynek Schlawackb5501102012-12-10 09:11:25 +0100433 raise OSError("Cannot call rmtree on a symbolic "
434 "link")
435 except OSError:
436 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200437 finally:
438 os.close(dirfd)
439 else:
440 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200441 os.unlink(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100442 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200443 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200444
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200445_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
446 os.supports_dir_fd and
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200447 os.scandir in os.supports_fd and
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200448 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000449
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200450def rmtree(path, ignore_errors=False, onerror=None):
451 """Recursively delete a directory tree.
452
453 If ignore_errors is set, errors are ignored; otherwise, if onerror
454 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200455 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200456 path is the argument to that function that caused it to fail; and
457 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
458 is false and onerror is None, an exception is raised.
459
460 """
461 if ignore_errors:
462 def onerror(*args):
463 pass
464 elif onerror is None:
465 def onerror(*args):
466 raise
467 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200468 # While the unsafe rmtree works fine on bytes, the fd based does not.
469 if isinstance(path, bytes):
470 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200471 # Note: To guard against symlink races, we use the standard
472 # lstat()/open()/fstat() trick.
473 try:
474 orig_st = os.lstat(path)
475 except Exception:
476 onerror(os.lstat, path, sys.exc_info())
477 return
478 try:
479 fd = os.open(path, os.O_RDONLY)
480 except Exception:
481 onerror(os.lstat, path, sys.exc_info())
482 return
483 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100484 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200485 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200486 try:
487 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200488 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200489 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200490 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100491 try:
492 # symlinks to directories are forbidden, see bug #1669
493 raise OSError("Cannot call rmtree on a symbolic link")
494 except OSError:
495 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200496 finally:
497 os.close(fd)
498 else:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200499 try:
500 if os.path.islink(path):
501 # symlinks to directories are forbidden, see bug #1669
502 raise OSError("Cannot call rmtree on a symbolic link")
503 except OSError:
504 onerror(os.path.islink, path, sys.exc_info())
505 # can't continue even if onerror hook returns
506 return
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200507 return _rmtree_unsafe(path, onerror)
508
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000509# Allow introspection of whether or not the hardening against symlink
510# attacks is supported on the current platform
511rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000512
Christian Heimesada8c3b2008-03-18 18:26:33 +0000513def _basename(path):
514 # A basename() variant which first strips the trailing slash, if present.
515 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200516 sep = os.path.sep + (os.path.altsep or '')
517 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000518
R David Murray6ffface2014-06-11 14:40:13 -0400519def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000520 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500521 similar to the Unix "mv" command. Return the file or directory's
522 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000523
524 If the destination is a directory or a symlink to a directory, the source
525 is moved inside the directory. The destination path must not already
526 exist.
527
528 If the destination already exists but is not a directory, it may be
529 overwritten depending on os.rename() semantics.
530
531 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100532 Otherwise, src is copied to the destination and then removed. Symlinks are
533 recreated under the new name if os.rename() fails because of cross
534 filesystem renames.
535
R David Murray6ffface2014-06-11 14:40:13 -0400536 The optional `copy_function` argument is a callable that will be used
537 to copy the source or it will be delegated to `copytree`.
538 By default, copy2() is used, but any function that supports the same
539 signature (like copy()) can be used.
540
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000541 A lot more could be done here... A look at a mv.c shows a lot of
542 the issues this implementation glosses over.
543
544 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000545 real_dst = dst
546 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200547 if _samefile(src, dst):
548 # We might be on a case insensitive filesystem,
549 # perform the rename anyway.
550 os.rename(src, dst)
551 return
552
Christian Heimesada8c3b2008-03-18 18:26:33 +0000553 real_dst = os.path.join(dst, _basename(src))
554 if os.path.exists(real_dst):
555 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000556 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000557 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200558 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100559 if os.path.islink(src):
560 linkto = os.readlink(src)
561 os.symlink(linkto, real_dst)
562 os.unlink(src)
563 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000564 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400565 raise Error("Cannot move a directory '%s' into itself"
566 " '%s'." % (src, dst))
567 copytree(src, real_dst, copy_function=copy_function,
568 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000569 rmtree(src)
570 else:
R David Murray6ffface2014-06-11 14:40:13 -0400571 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000572 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500573 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000574
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000575def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300576 src = os.path.abspath(src)
577 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000578 if not src.endswith(os.path.sep):
579 src += os.path.sep
580 if not dst.endswith(os.path.sep):
581 dst += os.path.sep
582 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000583
584def _get_gid(name):
585 """Returns a gid, given a group name."""
586 if getgrnam is None or name is None:
587 return None
588 try:
589 result = getgrnam(name)
590 except KeyError:
591 result = None
592 if result is not None:
593 return result[2]
594 return None
595
596def _get_uid(name):
597 """Returns an uid, given a user name."""
598 if getpwnam is None or name is None:
599 return None
600 try:
601 result = getpwnam(name)
602 except KeyError:
603 result = None
604 if result is not None:
605 return result[2]
606 return None
607
608def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
609 owner=None, group=None, logger=None):
610 """Create a (possibly compressed) tar file from all the files under
611 'base_dir'.
612
Serhiy Storchaka11213772014-08-06 18:50:19 +0300613 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000614
615 'owner' and 'group' can be used to define an owner and a group for the
616 archive that is being built. If not provided, the current owner and group
617 will be used.
618
Éric Araujo4433a5f2010-12-15 20:26:30 +0000619 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300620 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000621
622 Returns the output filename.
623 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200624 if compress is None:
625 tar_compression = ''
626 elif _ZLIB_SUPPORTED and compress == 'gzip':
627 tar_compression = 'gz'
628 elif _BZ2_SUPPORTED and compress == 'bzip2':
629 tar_compression = 'bz2'
630 elif _LZMA_SUPPORTED and compress == 'xz':
631 tar_compression = 'xz'
632 else:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000633 raise ValueError("bad value for 'compress', or compression format not "
634 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000635
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200636 import tarfile # late import for breaking circular dependency
637
638 compress_ext = '.' + tar_compression if compress else ''
639 archive_name = base_name + '.tar' + compress_ext
Tarek Ziadé396fad72010-02-23 05:30:31 +0000640 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000641
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200642 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000643 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200644 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000645 if not dry_run:
646 os.makedirs(archive_dir)
647
Tarek Ziadé396fad72010-02-23 05:30:31 +0000648 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000649 if logger is not None:
650 logger.info('Creating tar archive')
651
652 uid = _get_uid(owner)
653 gid = _get_gid(group)
654
655 def _set_uid_gid(tarinfo):
656 if gid is not None:
657 tarinfo.gid = gid
658 tarinfo.gname = group
659 if uid is not None:
660 tarinfo.uid = uid
661 tarinfo.uname = owner
662 return tarinfo
663
664 if not dry_run:
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200665 tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000666 try:
667 tar.add(base_dir, filter=_set_uid_gid)
668 finally:
669 tar.close()
670
Tarek Ziadé396fad72010-02-23 05:30:31 +0000671 return archive_name
672
Tarek Ziadé396fad72010-02-23 05:30:31 +0000673def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
674 """Create a zip file from all the files under 'base_dir'.
675
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200676 The output zip file will be named 'base_name' + ".zip". Returns the
677 name of the output zip file.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000678 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200679 import zipfile # late import for breaking circular dependency
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400680
Tarek Ziadé396fad72010-02-23 05:30:31 +0000681 zip_filename = base_name + ".zip"
682 archive_dir = os.path.dirname(base_name)
683
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200684 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000685 if logger is not None:
686 logger.info("creating %s", archive_dir)
687 if not dry_run:
688 os.makedirs(archive_dir)
689
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400690 if logger is not None:
691 logger.info("creating '%s' and adding '%s' to it",
692 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000693
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400694 if not dry_run:
695 with zipfile.ZipFile(zip_filename, "w",
696 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300697 path = os.path.normpath(base_dir)
Serhiy Storchaka666de772016-10-23 15:55:09 +0300698 if path != os.curdir:
699 zf.write(path, path)
700 if logger is not None:
701 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400702 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300703 for name in sorted(dirnames):
704 path = os.path.normpath(os.path.join(dirpath, name))
705 zf.write(path, path)
706 if logger is not None:
707 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400708 for name in filenames:
709 path = os.path.normpath(os.path.join(dirpath, name))
710 if os.path.isfile(path):
711 zf.write(path, path)
712 if logger is not None:
713 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000714
715 return zip_filename
716
717_ARCHIVE_FORMATS = {
Tarek Ziadé396fad72010-02-23 05:30:31 +0000718 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200719}
720
721if _ZLIB_SUPPORTED:
722 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
723 "gzip'ed tar-file")
724 _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000725
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000726if _BZ2_SUPPORTED:
727 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
728 "bzip2'ed tar-file")
729
Serhiy Storchaka11213772014-08-06 18:50:19 +0300730if _LZMA_SUPPORTED:
731 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
732 "xz'ed tar-file")
733
Tarek Ziadé396fad72010-02-23 05:30:31 +0000734def get_archive_formats():
735 """Returns a list of supported formats for archiving and unarchiving.
736
737 Each element of the returned sequence is a tuple (name, description)
738 """
739 formats = [(name, registry[2]) for name, registry in
740 _ARCHIVE_FORMATS.items()]
741 formats.sort()
742 return formats
743
744def register_archive_format(name, function, extra_args=None, description=''):
745 """Registers an archive format.
746
747 name is the name of the format. function is the callable that will be
748 used to create archives. If provided, extra_args is a sequence of
749 (name, value) tuples that will be passed as arguments to the callable.
750 description can be provided to describe the format, and will be returned
751 by the get_archive_formats() function.
752 """
753 if extra_args is None:
754 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200755 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000756 raise TypeError('The %s object is not callable' % function)
757 if not isinstance(extra_args, (tuple, list)):
758 raise TypeError('extra_args needs to be a sequence')
759 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200760 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000761 raise TypeError('extra_args elements are : (arg_name, value)')
762
763 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
764
765def unregister_archive_format(name):
766 del _ARCHIVE_FORMATS[name]
767
768def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
769 dry_run=0, owner=None, group=None, logger=None):
770 """Create an archive file (eg. zip or tar).
771
772 'base_name' is the name of the file to create, minus any format-specific
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200773 extension; 'format' is the archive format: one of "zip", "tar", "gztar",
774 "bztar", or "xztar". Or any other registered format.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000775
776 'root_dir' is a directory that will be the root directory of the
777 archive; ie. we typically chdir into 'root_dir' before creating the
778 archive. 'base_dir' is the directory where we start archiving from;
779 ie. 'base_dir' will be the common prefix of all files and
780 directories in the archive. 'root_dir' and 'base_dir' both default
781 to the current directory. Returns the name of the archive file.
782
783 'owner' and 'group' are used when creating a tar archive. By default,
784 uses the current owner and group.
785 """
786 save_cwd = os.getcwd()
787 if root_dir is not None:
788 if logger is not None:
789 logger.debug("changing into '%s'", root_dir)
790 base_name = os.path.abspath(base_name)
791 if not dry_run:
792 os.chdir(root_dir)
793
794 if base_dir is None:
795 base_dir = os.curdir
796
797 kwargs = {'dry_run': dry_run, 'logger': logger}
798
799 try:
800 format_info = _ARCHIVE_FORMATS[format]
801 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300802 raise ValueError("unknown archive format '%s'" % format) from None
Tarek Ziadé396fad72010-02-23 05:30:31 +0000803
804 func = format_info[0]
805 for arg, val in format_info[1]:
806 kwargs[arg] = val
807
808 if format != 'zip':
809 kwargs['owner'] = owner
810 kwargs['group'] = group
811
812 try:
813 filename = func(base_name, base_dir, **kwargs)
814 finally:
815 if root_dir is not None:
816 if logger is not None:
817 logger.debug("changing back to '%s'", save_cwd)
818 os.chdir(save_cwd)
819
820 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000821
822
823def get_unpack_formats():
824 """Returns a list of supported formats for unpacking.
825
826 Each element of the returned sequence is a tuple
827 (name, extensions, description)
828 """
829 formats = [(name, info[0], info[3]) for name, info in
830 _UNPACK_FORMATS.items()]
831 formats.sort()
832 return formats
833
834def _check_unpack_options(extensions, function, extra_args):
835 """Checks what gets registered as an unpacker."""
836 # first make sure no other unpacker is registered for this extension
837 existing_extensions = {}
838 for name, info in _UNPACK_FORMATS.items():
839 for ext in info[0]:
840 existing_extensions[ext] = name
841
842 for extension in extensions:
843 if extension in existing_extensions:
844 msg = '%s is already registered for "%s"'
845 raise RegistryError(msg % (extension,
846 existing_extensions[extension]))
847
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200848 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000849 raise TypeError('The registered function must be a callable')
850
851
852def register_unpack_format(name, extensions, function, extra_args=None,
853 description=''):
854 """Registers an unpack format.
855
856 `name` is the name of the format. `extensions` is a list of extensions
857 corresponding to the format.
858
859 `function` is the callable that will be
860 used to unpack archives. The callable will receive archives to unpack.
861 If it's unable to handle an archive, it needs to raise a ReadError
862 exception.
863
864 If provided, `extra_args` is a sequence of
865 (name, value) tuples that will be passed as arguments to the callable.
866 description can be provided to describe the format, and will be returned
867 by the get_unpack_formats() function.
868 """
869 if extra_args is None:
870 extra_args = []
871 _check_unpack_options(extensions, function, extra_args)
872 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
873
874def unregister_unpack_format(name):
Martin Pantereb995702016-07-28 01:11:04 +0000875 """Removes the pack format from the registry."""
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000876 del _UNPACK_FORMATS[name]
877
878def _ensure_directory(path):
879 """Ensure that the parent directory of `path` exists"""
880 dirname = os.path.dirname(path)
881 if not os.path.isdir(dirname):
882 os.makedirs(dirname)
883
884def _unpack_zipfile(filename, extract_dir):
885 """Unpack zip `filename` to `extract_dir`
886 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200887 import zipfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000888
889 if not zipfile.is_zipfile(filename):
890 raise ReadError("%s is not a zip file" % filename)
891
892 zip = zipfile.ZipFile(filename)
893 try:
894 for info in zip.infolist():
895 name = info.filename
896
897 # don't extract absolute paths or ones with .. in them
898 if name.startswith('/') or '..' in name:
899 continue
900
901 target = os.path.join(extract_dir, *name.split('/'))
902 if not target:
903 continue
904
905 _ensure_directory(target)
906 if not name.endswith('/'):
907 # file
908 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200909 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000910 try:
911 f.write(data)
912 finally:
913 f.close()
914 del data
915 finally:
916 zip.close()
917
918def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +0300919 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000920 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200921 import tarfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000922 try:
923 tarobj = tarfile.open(filename)
924 except tarfile.TarError:
925 raise ReadError(
926 "%s is not a compressed or uncompressed tar file" % filename)
927 try:
928 tarobj.extractall(extract_dir)
929 finally:
930 tarobj.close()
931
932_UNPACK_FORMATS = {
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000933 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200934 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"),
935}
936
937if _ZLIB_SUPPORTED:
938 _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
939 "gzip'ed tar-file")
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000940
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000941if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +0300942 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000943 "bzip2'ed tar-file")
944
Serhiy Storchaka11213772014-08-06 18:50:19 +0300945if _LZMA_SUPPORTED:
946 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
947 "xz'ed tar-file")
948
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000949def _find_unpack_format(filename):
950 for name, info in _UNPACK_FORMATS.items():
951 for extension in info[0]:
952 if filename.endswith(extension):
953 return name
954 return None
955
956def unpack_archive(filename, extract_dir=None, format=None):
957 """Unpack an archive.
958
959 `filename` is the name of the archive.
960
961 `extract_dir` is the name of the target directory, where the archive
962 is unpacked. If not provided, the current working directory is used.
963
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200964 `format` is the archive format: one of "zip", "tar", "gztar", "bztar",
965 or "xztar". Or any other registered format. If not provided,
966 unpack_archive will use the filename extension and see if an unpacker
967 was registered for that extension.
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000968
969 In case none is found, a ValueError is raised.
970 """
971 if extract_dir is None:
972 extract_dir = os.getcwd()
973
Jelle Zijlstraa12df7b2017-05-05 14:27:12 -0700974 extract_dir = os.fspath(extract_dir)
975 filename = os.fspath(filename)
976
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000977 if format is not None:
978 try:
979 format_info = _UNPACK_FORMATS[format]
980 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300981 raise ValueError("Unknown unpack format '{0}'".format(format)) from None
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000982
Nick Coghlanabf202d2011-03-16 13:52:20 -0400983 func = format_info[1]
984 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000985 else:
986 # we need to look at the registered unpackers supported extensions
987 format = _find_unpack_format(filename)
988 if format is None:
989 raise ReadError("Unknown archive format '{0}'".format(filename))
990
991 func = _UNPACK_FORMATS[format][1]
992 kwargs = dict(_UNPACK_FORMATS[format][2])
993 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200994
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200995
996if hasattr(os, 'statvfs'):
997
998 __all__.append('disk_usage')
999 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Raymond Hettinger5b798ab2015-08-17 22:04:45 -07001000 _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
1001 _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
1002 _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001003
1004 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001005 """Return disk usage statistics about the given path.
1006
Sandro Tosif8ae4fa2012-04-23 20:07:15 +02001007 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001008 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001009 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001010 st = os.statvfs(path)
1011 free = st.f_bavail * st.f_frsize
1012 total = st.f_blocks * st.f_frsize
1013 used = (st.f_blocks - st.f_bfree) * st.f_frsize
1014 return _ntuple_diskusage(total, used, free)
1015
1016elif os.name == 'nt':
1017
1018 import nt
1019 __all__.append('disk_usage')
1020 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
1021
1022 def disk_usage(path):
1023 """Return disk usage statistics about the given path.
1024
Ezio Melotti30b9d5d2013-08-17 15:50:46 +03001025 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001026 'free', which are the amount of total, used and free space, in bytes.
1027 """
1028 total, free = nt._getdiskusage(path)
1029 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001030 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001031
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001032
Sandro Tosid902a142011-08-22 23:28:27 +02001033def chown(path, user=None, group=None):
1034 """Change owner user and group of the given path.
1035
1036 user and group can be the uid/gid or the user/group names, and in that case,
1037 they are converted to their respective uid/gid.
1038 """
1039
1040 if user is None and group is None:
1041 raise ValueError("user and/or group must be set")
1042
1043 _user = user
1044 _group = group
1045
1046 # -1 means don't change it
1047 if user is None:
1048 _user = -1
1049 # user can either be an int (the uid) or a string (the system username)
1050 elif isinstance(user, str):
1051 _user = _get_uid(user)
1052 if _user is None:
1053 raise LookupError("no such user: {!r}".format(user))
1054
1055 if group is None:
1056 _group = -1
1057 elif not isinstance(group, int):
1058 _group = _get_gid(group)
1059 if _group is None:
1060 raise LookupError("no such group: {!r}".format(group))
1061
1062 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001063
1064def get_terminal_size(fallback=(80, 24)):
1065 """Get the size of the terminal window.
1066
1067 For each of the two dimensions, the environment variable, COLUMNS
1068 and LINES respectively, is checked. If the variable is defined and
1069 the value is a positive integer, it is used.
1070
1071 When COLUMNS or LINES is not defined, which is the common case,
1072 the terminal connected to sys.__stdout__ is queried
1073 by invoking os.get_terminal_size.
1074
1075 If the terminal size cannot be successfully queried, either because
1076 the system doesn't support querying, or because we are not
1077 connected to a terminal, the value given in fallback parameter
1078 is used. Fallback defaults to (80, 24) which is the default
1079 size used by many terminal emulators.
1080
1081 The value returned is a named tuple of type os.terminal_size.
1082 """
1083 # columns, lines are the working values
1084 try:
1085 columns = int(os.environ['COLUMNS'])
1086 except (KeyError, ValueError):
1087 columns = 0
1088
1089 try:
1090 lines = int(os.environ['LINES'])
1091 except (KeyError, ValueError):
1092 lines = 0
1093
1094 # only query if necessary
1095 if columns <= 0 or lines <= 0:
1096 try:
1097 size = os.get_terminal_size(sys.__stdout__.fileno())
Serhiy Storchakad30829d2016-04-24 09:58:43 +03001098 except (AttributeError, ValueError, OSError):
1099 # stdout is None, closed, detached, or not a terminal, or
1100 # os.get_terminal_size() is unsupported
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001101 size = os.terminal_size(fallback)
1102 if columns <= 0:
1103 columns = size.columns
1104 if lines <= 0:
1105 lines = size.lines
1106
1107 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001108
1109def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001110 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001111 conforms to the given mode on the PATH, or None if there is no such
1112 file.
1113
1114 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1115 of os.environ.get("PATH"), or can be overridden with a custom search
1116 path.
1117
1118 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001119 # Check that a given file can be accessed with the correct mode.
1120 # Additionally check that `file` is not a directory, as on Windows
1121 # directories pass the os.access check.
1122 def _access_check(fn, mode):
1123 return (os.path.exists(fn) and os.access(fn, mode)
1124 and not os.path.isdir(fn))
1125
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001126 # If we're given a path with a directory part, look it up directly rather
1127 # than referring to PATH directories. This includes checking relative to the
1128 # current directory, e.g. ./script
1129 if os.path.dirname(cmd):
1130 if _access_check(cmd, mode):
1131 return cmd
1132 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001133
Barry Warsaw618738b2013-04-16 11:05:03 -04001134 if path is None:
1135 path = os.environ.get("PATH", os.defpath)
1136 if not path:
1137 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001138 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001139
1140 if sys.platform == "win32":
1141 # The current directory takes precedence on Windows.
1142 if not os.curdir in path:
1143 path.insert(0, os.curdir)
1144
1145 # PATHEXT is necessary to check on Windows.
1146 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1147 # See if the given file matches any of the expected path extensions.
1148 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001149 # If it does match, only test that one, otherwise we have to try
1150 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001151 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1152 files = [cmd]
1153 else:
1154 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001155 else:
1156 # On other platforms you don't have things like PATHEXT to tell you
1157 # what file suffixes are executable, so just pass on cmd as-is.
1158 files = [cmd]
1159
1160 seen = set()
1161 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001162 normdir = os.path.normcase(dir)
1163 if not normdir in seen:
1164 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001165 for thefile in files:
1166 name = os.path.join(dir, thefile)
1167 if _access_check(name, mode):
1168 return name
1169 return None