blob: 4c6fdd7d33d49caa431bbc1cac97fd5e10d503ec [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +020013
14try:
15 import zlib
16 del zlib
17 _ZLIB_SUPPORTED = True
18except ImportError:
19 _ZLIB_SUPPORTED = False
Tarek Ziadé396fad72010-02-23 05:30:31 +000020
21try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000022 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010023 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000024 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040025except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000026 _BZ2_SUPPORTED = False
27
28try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030029 import lzma
30 del lzma
31 _LZMA_SUPPORTED = True
32except ImportError:
33 _LZMA_SUPPORTED = False
34
35try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000036 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040037except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000038 getpwnam = None
39
40try:
41 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040042except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000043 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Tarek Ziadéc3399782010-02-23 05:39:18 +000045__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
46 "copytree", "move", "rmtree", "Error", "SpecialFileError",
47 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000048 "register_archive_format", "unregister_archive_format",
49 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020050 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020051 "ignore_patterns", "chown", "which", "get_terminal_size",
52 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020053 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000054
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000056 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000057
Hynek Schlawack48653762012-10-07 12:49:58 +020058class SameFileError(Error):
59 """Raised when source and destination are the same file."""
60
Andrew Svetlov3438fa42012-12-17 23:35:18 +020061class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000062 """Raised when trying to do a kind of operation (e.g. copying) which is
63 not supported on a special file (e.g. a named pipe)"""
64
Andrew Svetlov3438fa42012-12-17 23:35:18 +020065class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000066 """Raised when a command could not be executed"""
67
Andrew Svetlov3438fa42012-12-17 23:35:18 +020068class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000069 """Raised when an archive cannot be read"""
70
71class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030072 """Raised when a registry operation with the archiving
Raymond Hettinger15f44ab2016-08-30 10:47:49 -070073 and unpacking registries fails"""
Tarek Ziadé6ac91722010-04-28 17:51:36 +000074
75
Greg Stein42bb8b32000-07-12 09:55:30 +000076def copyfileobj(fsrc, fdst, length=16*1024):
77 """copy data from file-like object fsrc to file-like object fdst"""
78 while 1:
79 buf = fsrc.read(length)
80 if not buf:
81 break
82 fdst.write(buf)
83
Johannes Gijsbers46f14592004-08-14 13:30:02 +000084def _samefile(src, dst):
85 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000086 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000087 try:
88 return os.path.samefile(src, dst)
89 except OSError:
90 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000091
92 # All other platforms: check for same pathname.
93 return (os.path.normcase(os.path.abspath(src)) ==
94 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000095
Larry Hastingsb4038062012-07-15 10:57:38 -070096def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010097 """Copy data from src to dst.
98
Larry Hastingsb4038062012-07-15 10:57:38 -070099 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +0100100 symlink will be created instead of copying the file it points to.
101
102 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000103 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +0200104 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000105
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000106 for fn in [src, dst]:
107 try:
108 st = os.stat(fn)
109 except OSError:
110 # File most likely does not exist
111 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000112 else:
113 # XXX What about other special files? (sockets, devices...)
114 if stat.S_ISFIFO(st.st_mode):
115 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000116
Larry Hastingsb4038062012-07-15 10:57:38 -0700117 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100118 os.symlink(os.readlink(src), dst)
119 else:
120 with open(src, 'rb') as fsrc:
121 with open(dst, 'wb') as fdst:
122 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500123 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000124
Larry Hastingsb4038062012-07-15 10:57:38 -0700125def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100126 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000127
Larry Hastingsb4038062012-07-15 10:57:38 -0700128 If follow_symlinks is not set, symlinks aren't followed if and only
129 if both `src` and `dst` are symlinks. If `lchmod` isn't available
130 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100131
132 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700133 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100134 if hasattr(os, 'lchmod'):
135 stat_func, chmod_func = os.lstat, os.lchmod
136 else:
137 return
138 elif hasattr(os, 'chmod'):
139 stat_func, chmod_func = os.stat, os.chmod
140 else:
141 return
142
143 st = stat_func(src)
144 chmod_func(dst, stat.S_IMODE(st.st_mode))
145
Larry Hastingsad5ae042012-07-14 17:55:11 -0700146if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700147 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700148 """Copy extended filesystem attributes from `src` to `dst`.
149
150 Overwrite existing attributes.
151
Larry Hastingsb4038062012-07-15 10:57:38 -0700152 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700153
154 """
155
Hynek Schlawack0beab052013-02-05 08:22:44 +0100156 try:
157 names = os.listxattr(src, follow_symlinks=follow_symlinks)
158 except OSError as e:
159 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
160 raise
161 return
162 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700163 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700164 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
165 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700166 except OSError as e:
167 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
168 raise
169else:
170 def _copyxattr(*args, **kwargs):
171 pass
172
Larry Hastingsb4038062012-07-15 10:57:38 -0700173def copystat(src, dst, *, follow_symlinks=True):
Zsolt Cserna861f61b2018-10-23 23:57:55 +0200174 """Copy file metadata
Antoine Pitrou78091e62011-12-29 18:54:15 +0100175
Zsolt Cserna861f61b2018-10-23 23:57:55 +0200176 Copy the permission bits, last access time, last modification time, and
177 flags from `src` to `dst`. On Linux, copystat() also copies the "extended
178 attributes" where possible. The file contents, owner, and group are
179 unaffected. `src` and `dst` are path names given as strings.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100180
Zsolt Cserna861f61b2018-10-23 23:57:55 +0200181 If the optional flag `follow_symlinks` is not set, symlinks aren't
182 followed if and only if both `src` and `dst` are symlinks.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100183 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700184 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100185 pass
186
Larry Hastings9cf065c2012-06-22 16:30:09 -0700187 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700188 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700189 if follow:
190 # use the real function if it exists
191 def lookup(name):
192 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100193 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700194 # use the real function only if it exists
195 # *and* it supports follow_symlinks
196 def lookup(name):
197 fn = getattr(os, name, _nop)
198 if fn in os.supports_follow_symlinks:
199 return fn
200 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100201
Larry Hastings9cf065c2012-06-22 16:30:09 -0700202 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000203 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700204 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
205 follow_symlinks=follow)
Miss Islington (bot)0a5b88e2019-05-13 22:30:22 -0700206 # We must copy extended attributes before the file is (potentially)
207 # chmod()'ed read-only, otherwise setxattr() will error with -EACCES.
208 _copyxattr(src, dst, follow_symlinks=follow)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700209 try:
210 lookup("chmod")(dst, mode, follow_symlinks=follow)
211 except NotImplementedError:
212 # if we got a NotImplementedError, it's because
213 # * follow_symlinks=False,
214 # * lchown() is unavailable, and
215 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300216 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700217 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
218 # (it returned ENOSUP.)
219 # therefore we're out of options--we simply cannot chown the
220 # symlink. give up, suppress the error.
221 # (which is what shutil always did in this circumstance.)
222 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100223 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000224 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700225 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000226 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700227 for err in 'EOPNOTSUPP', 'ENOTSUP':
228 if hasattr(errno, err) and why.errno == getattr(errno, err):
229 break
230 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000231 raise
Antoine Pitrou424246f2012-05-12 19:02:01 +0200232
Larry Hastingsb4038062012-07-15 10:57:38 -0700233def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500234 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000235
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000236 The destination may be a directory.
237
Larry Hastingsb4038062012-07-15 10:57:38 -0700238 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100239 resembles GNU's "cp -P src dst".
240
Hynek Schlawack48653762012-10-07 12:49:58 +0200241 If source and destination are the same file, a SameFileError will be
242 raised.
243
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000244 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000245 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000246 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700247 copyfile(src, dst, follow_symlinks=follow_symlinks)
248 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500249 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000250
Larry Hastingsb4038062012-07-15 10:57:38 -0700251def copy2(src, dst, *, follow_symlinks=True):
Zsolt Cserna861f61b2018-10-23 23:57:55 +0200252 """Copy data and metadata. Return the file's destination.
253
254 Metadata is copied with copystat(). Please see the copystat function
255 for more information.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000256
257 The destination may be a directory.
258
Larry Hastingsb4038062012-07-15 10:57:38 -0700259 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100260 resembles GNU's "cp -P src dst".
261
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000262 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000263 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000264 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700265 copyfile(src, dst, follow_symlinks=follow_symlinks)
266 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500267 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000268
Georg Brandl2ee470f2008-07-16 12:55:28 +0000269def ignore_patterns(*patterns):
270 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000271
Georg Brandl2ee470f2008-07-16 12:55:28 +0000272 Patterns is a sequence of glob-style patterns
273 that are used to exclude files"""
274 def _ignore_patterns(path, names):
275 ignored_names = []
276 for pattern in patterns:
277 ignored_names.extend(fnmatch.filter(names, pattern))
278 return set(ignored_names)
279 return _ignore_patterns
280
Tarek Ziadéfb437512010-04-20 08:57:33 +0000281def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
282 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000283 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000284
285 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000286 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000287
288 If the optional symlinks flag is true, symbolic links in the
289 source tree result in symbolic links in the destination tree; if
290 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000291 links are copied. If the file pointed by the symlink doesn't
292 exist, an exception will be added in the list of errors raised in
293 an Error exception at the end of the copy process.
294
295 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000296 want to silence this exception. Notice that this has no effect on
297 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000298
Georg Brandl2ee470f2008-07-16 12:55:28 +0000299 The optional ignore argument is a callable. If given, it
300 is called with the `src` parameter, which is the directory
301 being visited by copytree(), and `names` which is the list of
302 `src` contents, as returned by os.listdir():
303
304 callable(src, names) -> ignored_names
305
306 Since copytree() is called recursively, the callable will be
307 called once for each directory that is copied. It returns a
308 list of names relative to the `src` directory that should
309 not be copied.
310
Tarek Ziadé5340db32010-04-19 22:30:51 +0000311 The optional copy_function argument is a callable that will be used
312 to copy each file. It will be called with the source path and the
313 destination path as arguments. By default, copy2() is used, but any
314 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000315
316 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000317 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000318 if ignore is not None:
319 ignored_names = ignore(src, names)
320 else:
321 ignored_names = set()
322
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000323 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000324 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000325 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000326 if name in ignored_names:
327 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000328 srcname = os.path.join(src, name)
329 dstname = os.path.join(dst, name)
330 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000331 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000332 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000333 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100334 # We can't just leave it to `copy_function` because legacy
335 # code with a custom `copy_function` may rely on copytree
336 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000337 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700338 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000339 else:
340 # ignore dangling symlink if the flag is on
341 if not os.path.exists(linkto) and ignore_dangling_symlinks:
342 continue
343 # otherwise let the copy occurs. copy2 will raise an error
Berker Peksag5a294d82015-07-25 14:53:48 +0300344 if os.path.isdir(srcname):
345 copytree(srcname, dstname, symlinks, ignore,
346 copy_function)
347 else:
348 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000349 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000350 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000351 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000352 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000353 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000354 # catch the Error from the recursive copytree so that we can
355 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000356 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000357 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200358 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000359 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000360 try:
361 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000362 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200363 # Copying file access times may fail on Windows
Berker Peksag884afd92014-12-10 02:50:32 +0200364 if getattr(why, 'winerror', None) is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200365 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000366 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000367 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500368 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000369
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200370# version vulnerable to race conditions
371def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000372 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200373 with os.scandir(path) as scandir_it:
374 entries = list(scandir_it)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000375 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200376 onerror(os.scandir, path, sys.exc_info())
377 entries = []
378 for entry in entries:
379 fullname = entry.path
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000380 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200381 is_dir = entry.is_dir(follow_symlinks=False)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200382 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200383 is_dir = False
384 if is_dir:
385 try:
386 if entry.is_symlink():
387 # This can only happen if someone replaces
388 # a directory with a symlink after the call to
389 # os.scandir or entry.is_dir above.
390 raise OSError("Cannot call rmtree on a symbolic link")
391 except OSError:
392 onerror(os.path.islink, fullname, sys.exc_info())
393 continue
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200394 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000395 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000396 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200397 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200398 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200399 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000400 try:
401 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200402 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000403 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000404
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200405# Version using fd-based APIs to protect against races
406def _rmtree_safe_fd(topfd, path, onerror):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200407 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200408 with os.scandir(topfd) as scandir_it:
409 entries = list(scandir_it)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100410 except OSError as err:
411 err.filename = path
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200412 onerror(os.scandir, path, sys.exc_info())
413 return
414 for entry in entries:
415 fullname = os.path.join(path, entry.name)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200416 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200417 is_dir = entry.is_dir(follow_symlinks=False)
418 if is_dir:
419 orig_st = entry.stat(follow_symlinks=False)
420 is_dir = stat.S_ISDIR(orig_st.st_mode)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100421 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200422 is_dir = False
423 if is_dir:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200424 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200425 dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100426 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200427 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200428 else:
429 try:
430 if os.path.samestat(orig_st, os.fstat(dirfd)):
431 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200432 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200433 os.rmdir(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100434 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200435 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100436 else:
437 try:
438 # This can only happen if someone replaces
439 # a directory with a symlink after the call to
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200440 # os.scandir or stat.S_ISDIR above.
Hynek Schlawackb5501102012-12-10 09:11:25 +0100441 raise OSError("Cannot call rmtree on a symbolic "
442 "link")
443 except OSError:
444 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200445 finally:
446 os.close(dirfd)
447 else:
448 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200449 os.unlink(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100450 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200451 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200452
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200453_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
454 os.supports_dir_fd and
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200455 os.scandir in os.supports_fd and
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200456 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000457
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200458def rmtree(path, ignore_errors=False, onerror=None):
459 """Recursively delete a directory tree.
460
461 If ignore_errors is set, errors are ignored; otherwise, if onerror
462 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200463 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200464 path is the argument to that function that caused it to fail; and
465 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
466 is false and onerror is None, an exception is raised.
467
468 """
469 if ignore_errors:
470 def onerror(*args):
471 pass
472 elif onerror is None:
473 def onerror(*args):
474 raise
475 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200476 # While the unsafe rmtree works fine on bytes, the fd based does not.
477 if isinstance(path, bytes):
478 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200479 # Note: To guard against symlink races, we use the standard
480 # lstat()/open()/fstat() trick.
481 try:
482 orig_st = os.lstat(path)
483 except Exception:
484 onerror(os.lstat, path, sys.exc_info())
485 return
486 try:
487 fd = os.open(path, os.O_RDONLY)
488 except Exception:
489 onerror(os.lstat, path, sys.exc_info())
490 return
491 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100492 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200493 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200494 try:
495 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200496 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200497 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200498 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100499 try:
500 # symlinks to directories are forbidden, see bug #1669
501 raise OSError("Cannot call rmtree on a symbolic link")
502 except OSError:
503 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200504 finally:
505 os.close(fd)
506 else:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200507 try:
508 if os.path.islink(path):
509 # symlinks to directories are forbidden, see bug #1669
510 raise OSError("Cannot call rmtree on a symbolic link")
511 except OSError:
512 onerror(os.path.islink, path, sys.exc_info())
513 # can't continue even if onerror hook returns
514 return
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200515 return _rmtree_unsafe(path, onerror)
516
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000517# Allow introspection of whether or not the hardening against symlink
518# attacks is supported on the current platform
519rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000520
Christian Heimesada8c3b2008-03-18 18:26:33 +0000521def _basename(path):
522 # A basename() variant which first strips the trailing slash, if present.
523 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200524 sep = os.path.sep + (os.path.altsep or '')
525 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000526
R David Murray6ffface2014-06-11 14:40:13 -0400527def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000528 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500529 similar to the Unix "mv" command. Return the file or directory's
530 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000531
532 If the destination is a directory or a symlink to a directory, the source
533 is moved inside the directory. The destination path must not already
534 exist.
535
536 If the destination already exists but is not a directory, it may be
537 overwritten depending on os.rename() semantics.
538
539 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100540 Otherwise, src is copied to the destination and then removed. Symlinks are
541 recreated under the new name if os.rename() fails because of cross
542 filesystem renames.
543
R David Murray6ffface2014-06-11 14:40:13 -0400544 The optional `copy_function` argument is a callable that will be used
545 to copy the source or it will be delegated to `copytree`.
546 By default, copy2() is used, but any function that supports the same
547 signature (like copy()) can be used.
548
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000549 A lot more could be done here... A look at a mv.c shows a lot of
550 the issues this implementation glosses over.
551
552 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000553 real_dst = dst
554 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200555 if _samefile(src, dst):
556 # We might be on a case insensitive filesystem,
557 # perform the rename anyway.
558 os.rename(src, dst)
559 return
560
Christian Heimesada8c3b2008-03-18 18:26:33 +0000561 real_dst = os.path.join(dst, _basename(src))
562 if os.path.exists(real_dst):
563 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000564 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000565 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200566 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100567 if os.path.islink(src):
568 linkto = os.readlink(src)
569 os.symlink(linkto, real_dst)
570 os.unlink(src)
571 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000572 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400573 raise Error("Cannot move a directory '%s' into itself"
574 " '%s'." % (src, dst))
575 copytree(src, real_dst, copy_function=copy_function,
576 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000577 rmtree(src)
578 else:
R David Murray6ffface2014-06-11 14:40:13 -0400579 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000580 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500581 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000582
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000583def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300584 src = os.path.abspath(src)
585 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000586 if not src.endswith(os.path.sep):
587 src += os.path.sep
588 if not dst.endswith(os.path.sep):
589 dst += os.path.sep
590 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000591
592def _get_gid(name):
593 """Returns a gid, given a group name."""
594 if getgrnam is None or name is None:
595 return None
596 try:
597 result = getgrnam(name)
598 except KeyError:
599 result = None
600 if result is not None:
601 return result[2]
602 return None
603
604def _get_uid(name):
605 """Returns an uid, given a user name."""
606 if getpwnam is None or name is None:
607 return None
608 try:
609 result = getpwnam(name)
610 except KeyError:
611 result = None
612 if result is not None:
613 return result[2]
614 return None
615
616def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
617 owner=None, group=None, logger=None):
618 """Create a (possibly compressed) tar file from all the files under
619 'base_dir'.
620
Serhiy Storchaka11213772014-08-06 18:50:19 +0300621 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000622
623 'owner' and 'group' can be used to define an owner and a group for the
624 archive that is being built. If not provided, the current owner and group
625 will be used.
626
Éric Araujo4433a5f2010-12-15 20:26:30 +0000627 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300628 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000629
630 Returns the output filename.
631 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200632 if compress is None:
633 tar_compression = ''
634 elif _ZLIB_SUPPORTED and compress == 'gzip':
635 tar_compression = 'gz'
636 elif _BZ2_SUPPORTED and compress == 'bzip2':
637 tar_compression = 'bz2'
638 elif _LZMA_SUPPORTED and compress == 'xz':
639 tar_compression = 'xz'
640 else:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000641 raise ValueError("bad value for 'compress', or compression format not "
642 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000643
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200644 import tarfile # late import for breaking circular dependency
645
646 compress_ext = '.' + tar_compression if compress else ''
647 archive_name = base_name + '.tar' + compress_ext
Tarek Ziadé396fad72010-02-23 05:30:31 +0000648 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000649
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200650 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000651 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200652 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000653 if not dry_run:
654 os.makedirs(archive_dir)
655
Tarek Ziadé396fad72010-02-23 05:30:31 +0000656 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000657 if logger is not None:
658 logger.info('Creating tar archive')
659
660 uid = _get_uid(owner)
661 gid = _get_gid(group)
662
663 def _set_uid_gid(tarinfo):
664 if gid is not None:
665 tarinfo.gid = gid
666 tarinfo.gname = group
667 if uid is not None:
668 tarinfo.uid = uid
669 tarinfo.uname = owner
670 return tarinfo
671
672 if not dry_run:
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200673 tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000674 try:
675 tar.add(base_dir, filter=_set_uid_gid)
676 finally:
677 tar.close()
678
Tarek Ziadé396fad72010-02-23 05:30:31 +0000679 return archive_name
680
Tarek Ziadé396fad72010-02-23 05:30:31 +0000681def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
682 """Create a zip file from all the files under 'base_dir'.
683
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200684 The output zip file will be named 'base_name' + ".zip". Returns the
685 name of the output zip file.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000686 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200687 import zipfile # late import for breaking circular dependency
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400688
Tarek Ziadé396fad72010-02-23 05:30:31 +0000689 zip_filename = base_name + ".zip"
690 archive_dir = os.path.dirname(base_name)
691
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200692 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000693 if logger is not None:
694 logger.info("creating %s", archive_dir)
695 if not dry_run:
696 os.makedirs(archive_dir)
697
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400698 if logger is not None:
699 logger.info("creating '%s' and adding '%s' to it",
700 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000701
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400702 if not dry_run:
703 with zipfile.ZipFile(zip_filename, "w",
704 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300705 path = os.path.normpath(base_dir)
Serhiy Storchaka666de772016-10-23 15:55:09 +0300706 if path != os.curdir:
707 zf.write(path, path)
708 if logger is not None:
709 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400710 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300711 for name in sorted(dirnames):
712 path = os.path.normpath(os.path.join(dirpath, name))
713 zf.write(path, path)
714 if logger is not None:
715 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400716 for name in filenames:
717 path = os.path.normpath(os.path.join(dirpath, name))
718 if os.path.isfile(path):
719 zf.write(path, path)
720 if logger is not None:
721 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000722
723 return zip_filename
724
725_ARCHIVE_FORMATS = {
Tarek Ziadé396fad72010-02-23 05:30:31 +0000726 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200727}
728
729if _ZLIB_SUPPORTED:
730 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
731 "gzip'ed tar-file")
732 _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000733
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000734if _BZ2_SUPPORTED:
735 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
736 "bzip2'ed tar-file")
737
Serhiy Storchaka11213772014-08-06 18:50:19 +0300738if _LZMA_SUPPORTED:
739 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
740 "xz'ed tar-file")
741
Tarek Ziadé396fad72010-02-23 05:30:31 +0000742def get_archive_formats():
743 """Returns a list of supported formats for archiving and unarchiving.
744
745 Each element of the returned sequence is a tuple (name, description)
746 """
747 formats = [(name, registry[2]) for name, registry in
748 _ARCHIVE_FORMATS.items()]
749 formats.sort()
750 return formats
751
752def register_archive_format(name, function, extra_args=None, description=''):
753 """Registers an archive format.
754
755 name is the name of the format. function is the callable that will be
756 used to create archives. If provided, extra_args is a sequence of
757 (name, value) tuples that will be passed as arguments to the callable.
758 description can be provided to describe the format, and will be returned
759 by the get_archive_formats() function.
760 """
761 if extra_args is None:
762 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200763 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000764 raise TypeError('The %s object is not callable' % function)
765 if not isinstance(extra_args, (tuple, list)):
766 raise TypeError('extra_args needs to be a sequence')
767 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200768 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000769 raise TypeError('extra_args elements are : (arg_name, value)')
770
771 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
772
773def unregister_archive_format(name):
774 del _ARCHIVE_FORMATS[name]
775
776def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
777 dry_run=0, owner=None, group=None, logger=None):
778 """Create an archive file (eg. zip or tar).
779
780 'base_name' is the name of the file to create, minus any format-specific
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200781 extension; 'format' is the archive format: one of "zip", "tar", "gztar",
782 "bztar", or "xztar". Or any other registered format.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000783
784 'root_dir' is a directory that will be the root directory of the
785 archive; ie. we typically chdir into 'root_dir' before creating the
786 archive. 'base_dir' is the directory where we start archiving from;
787 ie. 'base_dir' will be the common prefix of all files and
788 directories in the archive. 'root_dir' and 'base_dir' both default
789 to the current directory. Returns the name of the archive file.
790
791 'owner' and 'group' are used when creating a tar archive. By default,
792 uses the current owner and group.
793 """
794 save_cwd = os.getcwd()
795 if root_dir is not None:
796 if logger is not None:
797 logger.debug("changing into '%s'", root_dir)
798 base_name = os.path.abspath(base_name)
799 if not dry_run:
800 os.chdir(root_dir)
801
802 if base_dir is None:
803 base_dir = os.curdir
804
805 kwargs = {'dry_run': dry_run, 'logger': logger}
806
807 try:
808 format_info = _ARCHIVE_FORMATS[format]
809 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300810 raise ValueError("unknown archive format '%s'" % format) from None
Tarek Ziadé396fad72010-02-23 05:30:31 +0000811
812 func = format_info[0]
813 for arg, val in format_info[1]:
814 kwargs[arg] = val
815
816 if format != 'zip':
817 kwargs['owner'] = owner
818 kwargs['group'] = group
819
820 try:
821 filename = func(base_name, base_dir, **kwargs)
822 finally:
823 if root_dir is not None:
824 if logger is not None:
825 logger.debug("changing back to '%s'", save_cwd)
826 os.chdir(save_cwd)
827
828 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000829
830
831def get_unpack_formats():
832 """Returns a list of supported formats for unpacking.
833
834 Each element of the returned sequence is a tuple
835 (name, extensions, description)
836 """
837 formats = [(name, info[0], info[3]) for name, info in
838 _UNPACK_FORMATS.items()]
839 formats.sort()
840 return formats
841
842def _check_unpack_options(extensions, function, extra_args):
843 """Checks what gets registered as an unpacker."""
844 # first make sure no other unpacker is registered for this extension
845 existing_extensions = {}
846 for name, info in _UNPACK_FORMATS.items():
847 for ext in info[0]:
848 existing_extensions[ext] = name
849
850 for extension in extensions:
851 if extension in existing_extensions:
852 msg = '%s is already registered for "%s"'
853 raise RegistryError(msg % (extension,
854 existing_extensions[extension]))
855
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200856 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000857 raise TypeError('The registered function must be a callable')
858
859
860def register_unpack_format(name, extensions, function, extra_args=None,
861 description=''):
862 """Registers an unpack format.
863
864 `name` is the name of the format. `extensions` is a list of extensions
865 corresponding to the format.
866
867 `function` is the callable that will be
868 used to unpack archives. The callable will receive archives to unpack.
869 If it's unable to handle an archive, it needs to raise a ReadError
870 exception.
871
872 If provided, `extra_args` is a sequence of
873 (name, value) tuples that will be passed as arguments to the callable.
874 description can be provided to describe the format, and will be returned
875 by the get_unpack_formats() function.
876 """
877 if extra_args is None:
878 extra_args = []
879 _check_unpack_options(extensions, function, extra_args)
880 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
881
882def unregister_unpack_format(name):
Martin Pantereb995702016-07-28 01:11:04 +0000883 """Removes the pack format from the registry."""
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000884 del _UNPACK_FORMATS[name]
885
886def _ensure_directory(path):
887 """Ensure that the parent directory of `path` exists"""
888 dirname = os.path.dirname(path)
889 if not os.path.isdir(dirname):
890 os.makedirs(dirname)
891
892def _unpack_zipfile(filename, extract_dir):
893 """Unpack zip `filename` to `extract_dir`
894 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200895 import zipfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000896
897 if not zipfile.is_zipfile(filename):
898 raise ReadError("%s is not a zip file" % filename)
899
900 zip = zipfile.ZipFile(filename)
901 try:
902 for info in zip.infolist():
903 name = info.filename
904
905 # don't extract absolute paths or ones with .. in them
906 if name.startswith('/') or '..' in name:
907 continue
908
909 target = os.path.join(extract_dir, *name.split('/'))
910 if not target:
911 continue
912
913 _ensure_directory(target)
914 if not name.endswith('/'):
915 # file
916 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200917 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000918 try:
919 f.write(data)
920 finally:
921 f.close()
922 del data
923 finally:
924 zip.close()
925
926def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +0300927 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000928 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200929 import tarfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000930 try:
931 tarobj = tarfile.open(filename)
932 except tarfile.TarError:
933 raise ReadError(
934 "%s is not a compressed or uncompressed tar file" % filename)
935 try:
936 tarobj.extractall(extract_dir)
937 finally:
938 tarobj.close()
939
940_UNPACK_FORMATS = {
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000941 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200942 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"),
943}
944
945if _ZLIB_SUPPORTED:
946 _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
947 "gzip'ed tar-file")
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000948
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000949if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +0300950 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000951 "bzip2'ed tar-file")
952
Serhiy Storchaka11213772014-08-06 18:50:19 +0300953if _LZMA_SUPPORTED:
954 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
955 "xz'ed tar-file")
956
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000957def _find_unpack_format(filename):
958 for name, info in _UNPACK_FORMATS.items():
959 for extension in info[0]:
960 if filename.endswith(extension):
961 return name
962 return None
963
964def unpack_archive(filename, extract_dir=None, format=None):
965 """Unpack an archive.
966
967 `filename` is the name of the archive.
968
969 `extract_dir` is the name of the target directory, where the archive
970 is unpacked. If not provided, the current working directory is used.
971
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200972 `format` is the archive format: one of "zip", "tar", "gztar", "bztar",
973 or "xztar". Or any other registered format. If not provided,
974 unpack_archive will use the filename extension and see if an unpacker
975 was registered for that extension.
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000976
977 In case none is found, a ValueError is raised.
978 """
979 if extract_dir is None:
980 extract_dir = os.getcwd()
981
Jelle Zijlstraa12df7b2017-05-05 14:27:12 -0700982 extract_dir = os.fspath(extract_dir)
983 filename = os.fspath(filename)
984
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000985 if format is not None:
986 try:
987 format_info = _UNPACK_FORMATS[format]
988 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300989 raise ValueError("Unknown unpack format '{0}'".format(format)) from None
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000990
Nick Coghlanabf202d2011-03-16 13:52:20 -0400991 func = format_info[1]
992 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000993 else:
994 # we need to look at the registered unpackers supported extensions
995 format = _find_unpack_format(filename)
996 if format is None:
997 raise ReadError("Unknown archive format '{0}'".format(filename))
998
999 func = _UNPACK_FORMATS[format][1]
1000 kwargs = dict(_UNPACK_FORMATS[format][2])
1001 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001002
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001003
1004if hasattr(os, 'statvfs'):
1005
1006 __all__.append('disk_usage')
1007 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Raymond Hettinger5b798ab2015-08-17 22:04:45 -07001008 _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
1009 _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
1010 _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001011
1012 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001013 """Return disk usage statistics about the given path.
1014
Sandro Tosif8ae4fa2012-04-23 20:07:15 +02001015 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001016 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001017 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001018 st = os.statvfs(path)
1019 free = st.f_bavail * st.f_frsize
1020 total = st.f_blocks * st.f_frsize
1021 used = (st.f_blocks - st.f_bfree) * st.f_frsize
1022 return _ntuple_diskusage(total, used, free)
1023
1024elif os.name == 'nt':
1025
1026 import nt
1027 __all__.append('disk_usage')
1028 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
1029
1030 def disk_usage(path):
1031 """Return disk usage statistics about the given path.
1032
Ezio Melotti30b9d5d2013-08-17 15:50:46 +03001033 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001034 'free', which are the amount of total, used and free space, in bytes.
1035 """
1036 total, free = nt._getdiskusage(path)
1037 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001038 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001039
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001040
Sandro Tosid902a142011-08-22 23:28:27 +02001041def chown(path, user=None, group=None):
1042 """Change owner user and group of the given path.
1043
1044 user and group can be the uid/gid or the user/group names, and in that case,
1045 they are converted to their respective uid/gid.
1046 """
1047
1048 if user is None and group is None:
1049 raise ValueError("user and/or group must be set")
1050
1051 _user = user
1052 _group = group
1053
1054 # -1 means don't change it
1055 if user is None:
1056 _user = -1
1057 # user can either be an int (the uid) or a string (the system username)
1058 elif isinstance(user, str):
1059 _user = _get_uid(user)
1060 if _user is None:
1061 raise LookupError("no such user: {!r}".format(user))
1062
1063 if group is None:
1064 _group = -1
1065 elif not isinstance(group, int):
1066 _group = _get_gid(group)
1067 if _group is None:
1068 raise LookupError("no such group: {!r}".format(group))
1069
1070 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001071
1072def get_terminal_size(fallback=(80, 24)):
1073 """Get the size of the terminal window.
1074
1075 For each of the two dimensions, the environment variable, COLUMNS
1076 and LINES respectively, is checked. If the variable is defined and
1077 the value is a positive integer, it is used.
1078
1079 When COLUMNS or LINES is not defined, which is the common case,
1080 the terminal connected to sys.__stdout__ is queried
1081 by invoking os.get_terminal_size.
1082
1083 If the terminal size cannot be successfully queried, either because
1084 the system doesn't support querying, or because we are not
1085 connected to a terminal, the value given in fallback parameter
1086 is used. Fallback defaults to (80, 24) which is the default
1087 size used by many terminal emulators.
1088
1089 The value returned is a named tuple of type os.terminal_size.
1090 """
1091 # columns, lines are the working values
1092 try:
1093 columns = int(os.environ['COLUMNS'])
1094 except (KeyError, ValueError):
1095 columns = 0
1096
1097 try:
1098 lines = int(os.environ['LINES'])
1099 except (KeyError, ValueError):
1100 lines = 0
1101
1102 # only query if necessary
1103 if columns <= 0 or lines <= 0:
1104 try:
1105 size = os.get_terminal_size(sys.__stdout__.fileno())
Serhiy Storchakad30829d2016-04-24 09:58:43 +03001106 except (AttributeError, ValueError, OSError):
1107 # stdout is None, closed, detached, or not a terminal, or
1108 # os.get_terminal_size() is unsupported
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001109 size = os.terminal_size(fallback)
1110 if columns <= 0:
1111 columns = size.columns
1112 if lines <= 0:
1113 lines = size.lines
1114
1115 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001116
1117def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001118 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001119 conforms to the given mode on the PATH, or None if there is no such
1120 file.
1121
1122 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1123 of os.environ.get("PATH"), or can be overridden with a custom search
1124 path.
1125
1126 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001127 # Check that a given file can be accessed with the correct mode.
1128 # Additionally check that `file` is not a directory, as on Windows
1129 # directories pass the os.access check.
1130 def _access_check(fn, mode):
1131 return (os.path.exists(fn) and os.access(fn, mode)
1132 and not os.path.isdir(fn))
1133
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001134 # If we're given a path with a directory part, look it up directly rather
1135 # than referring to PATH directories. This includes checking relative to the
1136 # current directory, e.g. ./script
1137 if os.path.dirname(cmd):
1138 if _access_check(cmd, mode):
1139 return cmd
1140 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001141
Barry Warsaw618738b2013-04-16 11:05:03 -04001142 if path is None:
Victor Stinner394b9912019-04-17 18:38:06 +02001143 path = os.environ.get("PATH", None)
1144 if path is None:
1145 try:
1146 path = os.confstr("CS_PATH")
1147 except (AttributeError, ValueError):
1148 # os.confstr() or CS_PATH is not available
1149 path = os.defpath
1150 # bpo-35755: Don't use os.defpath if the PATH environment variable is
1151 # set to an empty string
1152
1153 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
Barry Warsaw618738b2013-04-16 11:05:03 -04001154 if not path:
1155 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001156 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001157
1158 if sys.platform == "win32":
1159 # The current directory takes precedence on Windows.
1160 if not os.curdir in path:
1161 path.insert(0, os.curdir)
1162
1163 # PATHEXT is necessary to check on Windows.
1164 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1165 # See if the given file matches any of the expected path extensions.
1166 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001167 # If it does match, only test that one, otherwise we have to try
1168 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001169 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1170 files = [cmd]
1171 else:
1172 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001173 else:
1174 # On other platforms you don't have things like PATHEXT to tell you
1175 # what file suffixes are executable, so just pass on cmd as-is.
1176 files = [cmd]
1177
1178 seen = set()
1179 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001180 normdir = os.path.normcase(dir)
1181 if not normdir in seen:
1182 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001183 for thefile in files:
1184 name = os.path.join(dir, thefile)
1185 if _access_check(name, mode):
1186 return name
1187 return None