blob: ab1a7d61897ac01a3f6a29e525d2d418a5369d3f [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +020013
14try:
15 import zlib
16 del zlib
17 _ZLIB_SUPPORTED = True
18except ImportError:
19 _ZLIB_SUPPORTED = False
Tarek Ziadé396fad72010-02-23 05:30:31 +000020
21try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000022 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010023 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000024 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040025except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000026 _BZ2_SUPPORTED = False
27
28try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030029 import lzma
30 del lzma
31 _LZMA_SUPPORTED = True
32except ImportError:
33 _LZMA_SUPPORTED = False
34
35try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000036 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040037except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000038 getpwnam = None
39
40try:
41 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040042except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000043 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070045_WINDOWS = os.name == 'nt'
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020046posix = nt = None
47if os.name == 'posix':
48 import posix
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070049elif _WINDOWS:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020050 import nt
51
Inada Naoki4f190302019-03-02 13:31:01 +090052COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024
Giampaolo Rodola413d9552019-05-30 14:05:41 +080053_USE_CP_SENDFILE = hasattr(os, "sendfile") and sys.platform.startswith("linux")
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070054_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020055
Tarek Ziadéc3399782010-02-23 05:39:18 +000056__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
57 "copytree", "move", "rmtree", "Error", "SpecialFileError",
58 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000059 "register_archive_format", "unregister_archive_format",
60 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020061 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020062 "ignore_patterns", "chown", "which", "get_terminal_size",
63 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020064 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000065
Andrew Svetlov3438fa42012-12-17 23:35:18 +020066class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000067 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000068
Hynek Schlawack48653762012-10-07 12:49:58 +020069class SameFileError(Error):
70 """Raised when source and destination are the same file."""
71
Andrew Svetlov3438fa42012-12-17 23:35:18 +020072class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000073 """Raised when trying to do a kind of operation (e.g. copying) which is
74 not supported on a special file (e.g. a named pipe)"""
75
Andrew Svetlov3438fa42012-12-17 23:35:18 +020076class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000077 """Raised when a command could not be executed"""
78
Andrew Svetlov3438fa42012-12-17 23:35:18 +020079class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000080 """Raised when an archive cannot be read"""
81
82class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030083 """Raised when a registry operation with the archiving
Raymond Hettinger15f44ab2016-08-30 10:47:49 -070084 and unpacking registries fails"""
Tarek Ziadé6ac91722010-04-28 17:51:36 +000085
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020086class _GiveupOnFastCopy(Exception):
87 """Raised as a signal to fallback on using raw read()/write()
88 file copy when fast-copy functions fail to do so.
89 """
Tarek Ziadé6ac91722010-04-28 17:51:36 +000090
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070091def _fastcopy_fcopyfile(fsrc, fdst, flags):
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020092 """Copy a regular file content or metadata by using high-performance
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070093 fcopyfile(3) syscall (macOS).
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020094 """
95 try:
96 infd = fsrc.fileno()
97 outfd = fdst.fileno()
98 except Exception as err:
99 raise _GiveupOnFastCopy(err) # not a regular file
100
101 try:
102 posix._fcopyfile(infd, outfd, flags)
103 except OSError as err:
104 err.filename = fsrc.name
105 err.filename2 = fdst.name
106 if err.errno in {errno.EINVAL, errno.ENOTSUP}:
107 raise _GiveupOnFastCopy(err)
108 else:
109 raise err from None
110
111def _fastcopy_sendfile(fsrc, fdst):
112 """Copy data from one regular mmap-like fd to another by using
113 high-performance sendfile(2) syscall.
Giampaolo Rodola413d9552019-05-30 14:05:41 +0800114 This should work on Linux >= 2.6.33 only.
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200115 """
116 # Note: copyfileobj() is left alone in order to not introduce any
117 # unexpected breakage. Possible risks by using zero-copy calls
118 # in copyfileobj() are:
119 # - fdst cannot be open in "a"(ppend) mode
120 # - fsrc and fdst may be open in "t"(ext) mode
121 # - fsrc may be a BufferedReader (which hides unread data in a buffer),
122 # GzipFile (which decompresses data), HTTPResponse (which decodes
123 # chunks).
124 # - possibly others (e.g. encrypted fs/partition?)
Giampaolo Rodola413d9552019-05-30 14:05:41 +0800125 global _USE_CP_SENDFILE
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200126 try:
127 infd = fsrc.fileno()
128 outfd = fdst.fileno()
129 except Exception as err:
130 raise _GiveupOnFastCopy(err) # not a regular file
131
132 # Hopefully the whole file will be copied in a single call.
133 # sendfile() is called in a loop 'till EOF is reached (0 return)
134 # so a bufsize smaller or bigger than the actual file size
135 # should not make any difference, also in case the file content
136 # changes while being copied.
137 try:
138 blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB
139 except Exception:
140 blocksize = 2 ** 27 # 128MB
141
142 offset = 0
143 while True:
144 try:
145 sent = os.sendfile(outfd, infd, offset, blocksize)
146 except OSError as err:
147 # ...in oder to have a more informative exception.
148 err.filename = fsrc.name
149 err.filename2 = fdst.name
150
151 if err.errno == errno.ENOTSOCK:
152 # sendfile() on this platform (probably Linux < 2.6.33)
153 # does not support copies between regular files (only
154 # sockets).
Giampaolo Rodola413d9552019-05-30 14:05:41 +0800155 _USE_CP_SENDFILE = False
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200156 raise _GiveupOnFastCopy(err)
157
158 if err.errno == errno.ENOSPC: # filesystem is full
159 raise err from None
160
161 # Give up on first call and if no data was copied.
162 if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
163 raise _GiveupOnFastCopy(err)
164
165 raise err
166 else:
167 if sent == 0:
168 break # EOF
169 offset += sent
170
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700171def _copyfileobj_readinto(fsrc, fdst, length=COPY_BUFSIZE):
172 """readinto()/memoryview() based variant of copyfileobj().
173 *fsrc* must support readinto() method and both files must be
174 open in binary mode.
175 """
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200176 # Localize variable access to minimize overhead.
177 fsrc_readinto = fsrc.readinto
178 fdst_write = fdst.write
179 with memoryview(bytearray(length)) as mv:
180 while True:
181 n = fsrc_readinto(mv)
182 if not n:
183 break
184 elif n < length:
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700185 with mv[:n] as smv:
186 fdst.write(smv)
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200187 else:
188 fdst_write(mv)
189
Giampaolo Rodola3b0abb02019-02-24 15:46:40 -0800190def copyfileobj(fsrc, fdst, length=0):
Greg Stein42bb8b32000-07-12 09:55:30 +0000191 """copy data from file-like object fsrc to file-like object fdst"""
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700192 # Localize variable access to minimize overhead.
Giampaolo Rodola3b0abb02019-02-24 15:46:40 -0800193 if not length:
194 length = COPY_BUFSIZE
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700195 fsrc_read = fsrc.read
196 fdst_write = fdst.write
197 while True:
198 buf = fsrc_read(length)
199 if not buf:
200 break
201 fdst_write(buf)
Greg Stein42bb8b32000-07-12 09:55:30 +0000202
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000203def _samefile(src, dst):
204 # Macintosh, Unix.
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800205 if isinstance(src, os.DirEntry) and hasattr(os.path, 'samestat'):
206 try:
207 return os.path.samestat(src.stat(), os.stat(dst))
208 except OSError:
209 return False
210
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000211 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +0000212 try:
213 return os.path.samefile(src, dst)
214 except OSError:
215 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000216
217 # All other platforms: check for same pathname.
218 return (os.path.normcase(os.path.abspath(src)) ==
219 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +0000220
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800221def _stat(fn):
222 return fn.stat() if isinstance(fn, os.DirEntry) else os.stat(fn)
223
224def _islink(fn):
225 return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn)
226
Larry Hastingsb4038062012-07-15 10:57:38 -0700227def copyfile(src, dst, *, follow_symlinks=True):
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700228 """Copy data from src to dst in the most efficient way possible.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100229
Larry Hastingsb4038062012-07-15 10:57:38 -0700230 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +0100231 symlink will be created instead of copying the file it points to.
232
233 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000234 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +0200235 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000236
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700237 file_size = 0
238 for i, fn in enumerate([src, dst]):
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000239 try:
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800240 st = _stat(fn)
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000241 except OSError:
242 # File most likely does not exist
243 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000244 else:
245 # XXX What about other special files? (sockets, devices...)
246 if stat.S_ISFIFO(st.st_mode):
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800247 fn = fn.path if isinstance(fn, os.DirEntry) else fn
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000248 raise SpecialFileError("`%s` is a named pipe" % fn)
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700249 if _WINDOWS and i == 0:
250 file_size = st.st_size
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000251
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800252 if not follow_symlinks and _islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100253 os.symlink(os.readlink(src), dst)
254 else:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200255 with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700256 # macOS
257 if _HAS_FCOPYFILE:
258 try:
259 _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA)
260 return dst
261 except _GiveupOnFastCopy:
262 pass
Giampaolo Rodola413d9552019-05-30 14:05:41 +0800263 # Linux
264 elif _USE_CP_SENDFILE:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200265 try:
266 _fastcopy_sendfile(fsrc, fdst)
267 return dst
268 except _GiveupOnFastCopy:
269 pass
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700270 # Windows, see:
271 # https://github.com/python/cpython/pull/7160#discussion_r195405230
272 elif _WINDOWS and file_size > 0:
273 _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE))
274 return dst
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200275
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700276 copyfileobj(fsrc, fdst)
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200277
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500278 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000279
Larry Hastingsb4038062012-07-15 10:57:38 -0700280def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100281 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000282
Larry Hastingsb4038062012-07-15 10:57:38 -0700283 If follow_symlinks is not set, symlinks aren't followed if and only
284 if both `src` and `dst` are symlinks. If `lchmod` isn't available
285 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100286
287 """
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800288 if not follow_symlinks and _islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100289 if hasattr(os, 'lchmod'):
290 stat_func, chmod_func = os.lstat, os.lchmod
291 else:
292 return
Antoine Pitrou78091e62011-12-29 18:54:15 +0100293 else:
Anthony Sottile8377cd42019-02-25 14:32:27 -0800294 stat_func, chmod_func = _stat, os.chmod
Antoine Pitrou78091e62011-12-29 18:54:15 +0100295
296 st = stat_func(src)
297 chmod_func(dst, stat.S_IMODE(st.st_mode))
298
Larry Hastingsad5ae042012-07-14 17:55:11 -0700299if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700300 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700301 """Copy extended filesystem attributes from `src` to `dst`.
302
303 Overwrite existing attributes.
304
Larry Hastingsb4038062012-07-15 10:57:38 -0700305 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700306
307 """
308
Hynek Schlawack0beab052013-02-05 08:22:44 +0100309 try:
310 names = os.listxattr(src, follow_symlinks=follow_symlinks)
311 except OSError as e:
Ying Wanga16387a2019-05-29 23:25:31 -0400312 if e.errno not in (errno.ENOTSUP, errno.ENODATA, errno.EINVAL):
Hynek Schlawack0beab052013-02-05 08:22:44 +0100313 raise
314 return
315 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700316 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700317 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
318 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700319 except OSError as e:
Ying Wanga16387a2019-05-29 23:25:31 -0400320 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA,
321 errno.EINVAL):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700322 raise
323else:
324 def _copyxattr(*args, **kwargs):
325 pass
326
Larry Hastingsb4038062012-07-15 10:57:38 -0700327def copystat(src, dst, *, follow_symlinks=True):
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200328 """Copy file metadata
Antoine Pitrou78091e62011-12-29 18:54:15 +0100329
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200330 Copy the permission bits, last access time, last modification time, and
331 flags from `src` to `dst`. On Linux, copystat() also copies the "extended
332 attributes" where possible. The file contents, owner, and group are
333 unaffected. `src` and `dst` are path names given as strings.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100334
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200335 If the optional flag `follow_symlinks` is not set, symlinks aren't
336 followed if and only if both `src` and `dst` are symlinks.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100337 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700338 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100339 pass
340
Larry Hastings9cf065c2012-06-22 16:30:09 -0700341 # follow symlinks (aka don't not follow symlinks)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800342 follow = follow_symlinks or not (_islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700343 if follow:
344 # use the real function if it exists
345 def lookup(name):
346 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100347 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700348 # use the real function only if it exists
349 # *and* it supports follow_symlinks
350 def lookup(name):
351 fn = getattr(os, name, _nop)
352 if fn in os.supports_follow_symlinks:
353 return fn
354 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100355
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800356 if isinstance(src, os.DirEntry):
357 st = src.stat(follow_symlinks=follow)
358 else:
359 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000360 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700361 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
362 follow_symlinks=follow)
Olexa Bilaniuk79efbb72019-05-09 22:22:06 -0500363 # We must copy extended attributes before the file is (potentially)
364 # chmod()'ed read-only, otherwise setxattr() will error with -EACCES.
365 _copyxattr(src, dst, follow_symlinks=follow)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700366 try:
367 lookup("chmod")(dst, mode, follow_symlinks=follow)
368 except NotImplementedError:
369 # if we got a NotImplementedError, it's because
370 # * follow_symlinks=False,
371 # * lchown() is unavailable, and
372 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300373 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700374 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
375 # (it returned ENOSUP.)
376 # therefore we're out of options--we simply cannot chown the
377 # symlink. give up, suppress the error.
378 # (which is what shutil always did in this circumstance.)
379 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100380 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000381 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700382 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000383 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700384 for err in 'EOPNOTSUPP', 'ENOTSUP':
385 if hasattr(errno, err) and why.errno == getattr(errno, err):
386 break
387 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000388 raise
Antoine Pitrou424246f2012-05-12 19:02:01 +0200389
Larry Hastingsb4038062012-07-15 10:57:38 -0700390def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500391 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000392
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000393 The destination may be a directory.
394
Larry Hastingsb4038062012-07-15 10:57:38 -0700395 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100396 resembles GNU's "cp -P src dst".
397
Hynek Schlawack48653762012-10-07 12:49:58 +0200398 If source and destination are the same file, a SameFileError will be
399 raised.
400
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000401 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000402 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000403 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700404 copyfile(src, dst, follow_symlinks=follow_symlinks)
405 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500406 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000407
Larry Hastingsb4038062012-07-15 10:57:38 -0700408def copy2(src, dst, *, follow_symlinks=True):
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200409 """Copy data and metadata. Return the file's destination.
410
411 Metadata is copied with copystat(). Please see the copystat function
412 for more information.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000413
414 The destination may be a directory.
415
Larry Hastingsb4038062012-07-15 10:57:38 -0700416 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100417 resembles GNU's "cp -P src dst".
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000418 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000419 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000420 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700421 copyfile(src, dst, follow_symlinks=follow_symlinks)
422 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500423 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000424
Georg Brandl2ee470f2008-07-16 12:55:28 +0000425def ignore_patterns(*patterns):
426 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000427
Georg Brandl2ee470f2008-07-16 12:55:28 +0000428 Patterns is a sequence of glob-style patterns
429 that are used to exclude files"""
430 def _ignore_patterns(path, names):
431 ignored_names = []
432 for pattern in patterns:
433 ignored_names.extend(fnmatch.filter(names, pattern))
434 return set(ignored_names)
435 return _ignore_patterns
436
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800437def _copytree(entries, src, dst, symlinks, ignore, copy_function,
jab9e00d9e2018-12-28 13:03:40 -0500438 ignore_dangling_symlinks, dirs_exist_ok=False):
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800439 if ignore is not None:
440 ignored_names = ignore(src, set(os.listdir(src)))
441 else:
442 ignored_names = set()
443
jab9e00d9e2018-12-28 13:03:40 -0500444 os.makedirs(dst, exist_ok=dirs_exist_ok)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800445 errors = []
446 use_srcentry = copy_function is copy2 or copy_function is copy
447
448 for srcentry in entries:
449 if srcentry.name in ignored_names:
450 continue
451 srcname = os.path.join(src, srcentry.name)
452 dstname = os.path.join(dst, srcentry.name)
453 srcobj = srcentry if use_srcentry else srcname
454 try:
455 if srcentry.is_symlink():
456 linkto = os.readlink(srcname)
457 if symlinks:
458 # We can't just leave it to `copy_function` because legacy
459 # code with a custom `copy_function` may rely on copytree
460 # doing the right thing.
461 os.symlink(linkto, dstname)
462 copystat(srcobj, dstname, follow_symlinks=not symlinks)
463 else:
464 # ignore dangling symlink if the flag is on
465 if not os.path.exists(linkto) and ignore_dangling_symlinks:
466 continue
jab9e00d9e2018-12-28 13:03:40 -0500467 # otherwise let the copy occur. copy2 will raise an error
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800468 if srcentry.is_dir():
469 copytree(srcobj, dstname, symlinks, ignore,
jab9e00d9e2018-12-28 13:03:40 -0500470 copy_function, dirs_exist_ok=dirs_exist_ok)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800471 else:
472 copy_function(srcobj, dstname)
473 elif srcentry.is_dir():
jab9e00d9e2018-12-28 13:03:40 -0500474 copytree(srcobj, dstname, symlinks, ignore, copy_function,
475 dirs_exist_ok=dirs_exist_ok)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800476 else:
477 # Will raise a SpecialFileError for unsupported file types
Giampaolo Rodolac606a9c2019-02-26 12:04:41 +0100478 copy_function(srcobj, dstname)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800479 # catch the Error from the recursive copytree so that we can
480 # continue with other files
481 except Error as err:
482 errors.extend(err.args[0])
483 except OSError as why:
484 errors.append((srcname, dstname, str(why)))
485 try:
486 copystat(src, dst)
487 except OSError as why:
488 # Copying file access times may fail on Windows
489 if getattr(why, 'winerror', None) is None:
490 errors.append((src, dst, str(why)))
491 if errors:
492 raise Error(errors)
493 return dst
494
Tarek Ziadéfb437512010-04-20 08:57:33 +0000495def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
jab9e00d9e2018-12-28 13:03:40 -0500496 ignore_dangling_symlinks=False, dirs_exist_ok=False):
497 """Recursively copy a directory tree and return the destination directory.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000498
jab9e00d9e2018-12-28 13:03:40 -0500499 dirs_exist_ok dictates whether to raise an exception in case dst or any
500 missing parent directory already exists.
501
Neal Norwitza4c93b62003-02-23 21:36:32 +0000502 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000503
504 If the optional symlinks flag is true, symbolic links in the
505 source tree result in symbolic links in the destination tree; if
506 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000507 links are copied. If the file pointed by the symlink doesn't
508 exist, an exception will be added in the list of errors raised in
509 an Error exception at the end of the copy process.
510
511 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000512 want to silence this exception. Notice that this has no effect on
513 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000514
Georg Brandl2ee470f2008-07-16 12:55:28 +0000515 The optional ignore argument is a callable. If given, it
516 is called with the `src` parameter, which is the directory
517 being visited by copytree(), and `names` which is the list of
518 `src` contents, as returned by os.listdir():
519
520 callable(src, names) -> ignored_names
521
522 Since copytree() is called recursively, the callable will be
523 called once for each directory that is copied. It returns a
524 list of names relative to the `src` directory that should
525 not be copied.
526
Tarek Ziadé5340db32010-04-19 22:30:51 +0000527 The optional copy_function argument is a callable that will be used
528 to copy each file. It will be called with the source path and the
529 destination path as arguments. By default, copy2() is used, but any
530 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000531
532 """
Steve Dower60419a72019-06-24 08:42:54 -0700533 sys.audit("shutil.copytree", src, dst)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800534 with os.scandir(src) as entries:
535 return _copytree(entries=entries, src=src, dst=dst, symlinks=symlinks,
536 ignore=ignore, copy_function=copy_function,
jab9e00d9e2018-12-28 13:03:40 -0500537 ignore_dangling_symlinks=ignore_dangling_symlinks,
538 dirs_exist_ok=dirs_exist_ok)
Guido van Rossumd7673291998-02-06 21:38:09 +0000539
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200540# version vulnerable to race conditions
541def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000542 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200543 with os.scandir(path) as scandir_it:
544 entries = list(scandir_it)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000545 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200546 onerror(os.scandir, path, sys.exc_info())
547 entries = []
548 for entry in entries:
549 fullname = entry.path
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000550 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200551 is_dir = entry.is_dir(follow_symlinks=False)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200552 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200553 is_dir = False
554 if is_dir:
555 try:
556 if entry.is_symlink():
557 # This can only happen if someone replaces
558 # a directory with a symlink after the call to
559 # os.scandir or entry.is_dir above.
560 raise OSError("Cannot call rmtree on a symbolic link")
561 except OSError:
562 onerror(os.path.islink, fullname, sys.exc_info())
563 continue
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200564 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000565 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000566 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200567 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200568 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200569 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000570 try:
571 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200572 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000573 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000574
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200575# Version using fd-based APIs to protect against races
576def _rmtree_safe_fd(topfd, path, onerror):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200577 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200578 with os.scandir(topfd) as scandir_it:
579 entries = list(scandir_it)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100580 except OSError as err:
581 err.filename = path
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200582 onerror(os.scandir, path, sys.exc_info())
583 return
584 for entry in entries:
585 fullname = os.path.join(path, entry.name)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200586 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200587 is_dir = entry.is_dir(follow_symlinks=False)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100588 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200589 is_dir = False
Serhiy Storchakae9b51c02019-05-31 11:30:37 +0300590 else:
591 if is_dir:
592 try:
593 orig_st = entry.stat(follow_symlinks=False)
594 is_dir = stat.S_ISDIR(orig_st.st_mode)
595 except OSError:
596 onerror(os.lstat, fullname, sys.exc_info())
597 continue
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200598 if is_dir:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200599 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200600 dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100601 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200602 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200603 else:
604 try:
605 if os.path.samestat(orig_st, os.fstat(dirfd)):
606 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200607 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200608 os.rmdir(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100609 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200610 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100611 else:
612 try:
613 # This can only happen if someone replaces
614 # a directory with a symlink after the call to
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200615 # os.scandir or stat.S_ISDIR above.
Hynek Schlawackb5501102012-12-10 09:11:25 +0100616 raise OSError("Cannot call rmtree on a symbolic "
617 "link")
618 except OSError:
619 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200620 finally:
621 os.close(dirfd)
622 else:
623 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200624 os.unlink(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100625 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200626 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200627
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200628_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
629 os.supports_dir_fd and
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200630 os.scandir in os.supports_fd and
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200631 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000632
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200633def rmtree(path, ignore_errors=False, onerror=None):
634 """Recursively delete a directory tree.
635
636 If ignore_errors is set, errors are ignored; otherwise, if onerror
637 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200638 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200639 path is the argument to that function that caused it to fail; and
640 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
641 is false and onerror is None, an exception is raised.
642
643 """
Steve Dower60419a72019-06-24 08:42:54 -0700644 sys.audit("shutil.rmtree", path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200645 if ignore_errors:
646 def onerror(*args):
647 pass
648 elif onerror is None:
649 def onerror(*args):
650 raise
651 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200652 # While the unsafe rmtree works fine on bytes, the fd based does not.
653 if isinstance(path, bytes):
654 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200655 # Note: To guard against symlink races, we use the standard
656 # lstat()/open()/fstat() trick.
657 try:
658 orig_st = os.lstat(path)
659 except Exception:
660 onerror(os.lstat, path, sys.exc_info())
661 return
662 try:
663 fd = os.open(path, os.O_RDONLY)
664 except Exception:
665 onerror(os.lstat, path, sys.exc_info())
666 return
667 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100668 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200669 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200670 try:
671 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200672 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200673 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200674 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100675 try:
676 # symlinks to directories are forbidden, see bug #1669
677 raise OSError("Cannot call rmtree on a symbolic link")
678 except OSError:
679 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200680 finally:
681 os.close(fd)
682 else:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200683 try:
684 if os.path.islink(path):
685 # symlinks to directories are forbidden, see bug #1669
686 raise OSError("Cannot call rmtree on a symbolic link")
687 except OSError:
688 onerror(os.path.islink, path, sys.exc_info())
689 # can't continue even if onerror hook returns
690 return
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200691 return _rmtree_unsafe(path, onerror)
692
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000693# Allow introspection of whether or not the hardening against symlink
694# attacks is supported on the current platform
695rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000696
Christian Heimesada8c3b2008-03-18 18:26:33 +0000697def _basename(path):
698 # A basename() variant which first strips the trailing slash, if present.
699 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200700 sep = os.path.sep + (os.path.altsep or '')
701 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000702
R David Murray6ffface2014-06-11 14:40:13 -0400703def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000704 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500705 similar to the Unix "mv" command. Return the file or directory's
706 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000707
708 If the destination is a directory or a symlink to a directory, the source
709 is moved inside the directory. The destination path must not already
710 exist.
711
712 If the destination already exists but is not a directory, it may be
713 overwritten depending on os.rename() semantics.
714
715 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100716 Otherwise, src is copied to the destination and then removed. Symlinks are
717 recreated under the new name if os.rename() fails because of cross
718 filesystem renames.
719
R David Murray6ffface2014-06-11 14:40:13 -0400720 The optional `copy_function` argument is a callable that will be used
721 to copy the source or it will be delegated to `copytree`.
722 By default, copy2() is used, but any function that supports the same
723 signature (like copy()) can be used.
724
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000725 A lot more could be done here... A look at a mv.c shows a lot of
726 the issues this implementation glosses over.
727
728 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000729 real_dst = dst
730 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200731 if _samefile(src, dst):
732 # We might be on a case insensitive filesystem,
733 # perform the rename anyway.
734 os.rename(src, dst)
735 return
736
Christian Heimesada8c3b2008-03-18 18:26:33 +0000737 real_dst = os.path.join(dst, _basename(src))
738 if os.path.exists(real_dst):
739 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000740 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000741 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200742 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100743 if os.path.islink(src):
744 linkto = os.readlink(src)
745 os.symlink(linkto, real_dst)
746 os.unlink(src)
747 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000748 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400749 raise Error("Cannot move a directory '%s' into itself"
750 " '%s'." % (src, dst))
751 copytree(src, real_dst, copy_function=copy_function,
752 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000753 rmtree(src)
754 else:
R David Murray6ffface2014-06-11 14:40:13 -0400755 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000756 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500757 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000758
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000759def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300760 src = os.path.abspath(src)
761 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000762 if not src.endswith(os.path.sep):
763 src += os.path.sep
764 if not dst.endswith(os.path.sep):
765 dst += os.path.sep
766 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000767
768def _get_gid(name):
769 """Returns a gid, given a group name."""
770 if getgrnam is None or name is None:
771 return None
772 try:
773 result = getgrnam(name)
774 except KeyError:
775 result = None
776 if result is not None:
777 return result[2]
778 return None
779
780def _get_uid(name):
781 """Returns an uid, given a user name."""
782 if getpwnam is None or name is None:
783 return None
784 try:
785 result = getpwnam(name)
786 except KeyError:
787 result = None
788 if result is not None:
789 return result[2]
790 return None
791
792def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
793 owner=None, group=None, logger=None):
794 """Create a (possibly compressed) tar file from all the files under
795 'base_dir'.
796
Serhiy Storchaka11213772014-08-06 18:50:19 +0300797 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000798
799 'owner' and 'group' can be used to define an owner and a group for the
800 archive that is being built. If not provided, the current owner and group
801 will be used.
802
Éric Araujo4433a5f2010-12-15 20:26:30 +0000803 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300804 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000805
806 Returns the output filename.
807 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200808 if compress is None:
809 tar_compression = ''
810 elif _ZLIB_SUPPORTED and compress == 'gzip':
811 tar_compression = 'gz'
812 elif _BZ2_SUPPORTED and compress == 'bzip2':
813 tar_compression = 'bz2'
814 elif _LZMA_SUPPORTED and compress == 'xz':
815 tar_compression = 'xz'
816 else:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000817 raise ValueError("bad value for 'compress', or compression format not "
818 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000819
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200820 import tarfile # late import for breaking circular dependency
821
822 compress_ext = '.' + tar_compression if compress else ''
823 archive_name = base_name + '.tar' + compress_ext
Tarek Ziadé396fad72010-02-23 05:30:31 +0000824 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000825
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200826 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000827 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200828 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000829 if not dry_run:
830 os.makedirs(archive_dir)
831
Tarek Ziadé396fad72010-02-23 05:30:31 +0000832 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000833 if logger is not None:
834 logger.info('Creating tar archive')
835
836 uid = _get_uid(owner)
837 gid = _get_gid(group)
838
839 def _set_uid_gid(tarinfo):
840 if gid is not None:
841 tarinfo.gid = gid
842 tarinfo.gname = group
843 if uid is not None:
844 tarinfo.uid = uid
845 tarinfo.uname = owner
846 return tarinfo
847
848 if not dry_run:
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200849 tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000850 try:
851 tar.add(base_dir, filter=_set_uid_gid)
852 finally:
853 tar.close()
854
Tarek Ziadé396fad72010-02-23 05:30:31 +0000855 return archive_name
856
Tarek Ziadé396fad72010-02-23 05:30:31 +0000857def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
858 """Create a zip file from all the files under 'base_dir'.
859
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200860 The output zip file will be named 'base_name' + ".zip". Returns the
861 name of the output zip file.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000862 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200863 import zipfile # late import for breaking circular dependency
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400864
Tarek Ziadé396fad72010-02-23 05:30:31 +0000865 zip_filename = base_name + ".zip"
866 archive_dir = os.path.dirname(base_name)
867
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200868 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000869 if logger is not None:
870 logger.info("creating %s", archive_dir)
871 if not dry_run:
872 os.makedirs(archive_dir)
873
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400874 if logger is not None:
875 logger.info("creating '%s' and adding '%s' to it",
876 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000877
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400878 if not dry_run:
879 with zipfile.ZipFile(zip_filename, "w",
880 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300881 path = os.path.normpath(base_dir)
Serhiy Storchaka666de772016-10-23 15:55:09 +0300882 if path != os.curdir:
883 zf.write(path, path)
884 if logger is not None:
885 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400886 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300887 for name in sorted(dirnames):
888 path = os.path.normpath(os.path.join(dirpath, name))
889 zf.write(path, path)
890 if logger is not None:
891 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400892 for name in filenames:
893 path = os.path.normpath(os.path.join(dirpath, name))
894 if os.path.isfile(path):
895 zf.write(path, path)
896 if logger is not None:
897 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000898
899 return zip_filename
900
901_ARCHIVE_FORMATS = {
Tarek Ziadé396fad72010-02-23 05:30:31 +0000902 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200903}
904
905if _ZLIB_SUPPORTED:
906 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
907 "gzip'ed tar-file")
908 _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000909
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000910if _BZ2_SUPPORTED:
911 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
912 "bzip2'ed tar-file")
913
Serhiy Storchaka11213772014-08-06 18:50:19 +0300914if _LZMA_SUPPORTED:
915 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
916 "xz'ed tar-file")
917
Tarek Ziadé396fad72010-02-23 05:30:31 +0000918def get_archive_formats():
919 """Returns a list of supported formats for archiving and unarchiving.
920
921 Each element of the returned sequence is a tuple (name, description)
922 """
923 formats = [(name, registry[2]) for name, registry in
924 _ARCHIVE_FORMATS.items()]
925 formats.sort()
926 return formats
927
928def register_archive_format(name, function, extra_args=None, description=''):
929 """Registers an archive format.
930
931 name is the name of the format. function is the callable that will be
932 used to create archives. If provided, extra_args is a sequence of
933 (name, value) tuples that will be passed as arguments to the callable.
934 description can be provided to describe the format, and will be returned
935 by the get_archive_formats() function.
936 """
937 if extra_args is None:
938 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200939 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000940 raise TypeError('The %s object is not callable' % function)
941 if not isinstance(extra_args, (tuple, list)):
942 raise TypeError('extra_args needs to be a sequence')
943 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200944 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000945 raise TypeError('extra_args elements are : (arg_name, value)')
946
947 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
948
949def unregister_archive_format(name):
950 del _ARCHIVE_FORMATS[name]
951
952def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
953 dry_run=0, owner=None, group=None, logger=None):
954 """Create an archive file (eg. zip or tar).
955
956 'base_name' is the name of the file to create, minus any format-specific
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200957 extension; 'format' is the archive format: one of "zip", "tar", "gztar",
958 "bztar", or "xztar". Or any other registered format.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000959
960 'root_dir' is a directory that will be the root directory of the
961 archive; ie. we typically chdir into 'root_dir' before creating the
962 archive. 'base_dir' is the directory where we start archiving from;
963 ie. 'base_dir' will be the common prefix of all files and
964 directories in the archive. 'root_dir' and 'base_dir' both default
965 to the current directory. Returns the name of the archive file.
966
967 'owner' and 'group' are used when creating a tar archive. By default,
968 uses the current owner and group.
969 """
Steve Dower60419a72019-06-24 08:42:54 -0700970 sys.audit("shutil.make_archive", base_name, format, root_dir, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000971 save_cwd = os.getcwd()
972 if root_dir is not None:
973 if logger is not None:
974 logger.debug("changing into '%s'", root_dir)
975 base_name = os.path.abspath(base_name)
976 if not dry_run:
977 os.chdir(root_dir)
978
979 if base_dir is None:
980 base_dir = os.curdir
981
982 kwargs = {'dry_run': dry_run, 'logger': logger}
983
984 try:
985 format_info = _ARCHIVE_FORMATS[format]
986 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300987 raise ValueError("unknown archive format '%s'" % format) from None
Tarek Ziadé396fad72010-02-23 05:30:31 +0000988
989 func = format_info[0]
990 for arg, val in format_info[1]:
991 kwargs[arg] = val
992
993 if format != 'zip':
994 kwargs['owner'] = owner
995 kwargs['group'] = group
996
997 try:
998 filename = func(base_name, base_dir, **kwargs)
999 finally:
1000 if root_dir is not None:
1001 if logger is not None:
1002 logger.debug("changing back to '%s'", save_cwd)
1003 os.chdir(save_cwd)
1004
1005 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001006
1007
1008def get_unpack_formats():
1009 """Returns a list of supported formats for unpacking.
1010
1011 Each element of the returned sequence is a tuple
1012 (name, extensions, description)
1013 """
1014 formats = [(name, info[0], info[3]) for name, info in
1015 _UNPACK_FORMATS.items()]
1016 formats.sort()
1017 return formats
1018
1019def _check_unpack_options(extensions, function, extra_args):
1020 """Checks what gets registered as an unpacker."""
1021 # first make sure no other unpacker is registered for this extension
1022 existing_extensions = {}
1023 for name, info in _UNPACK_FORMATS.items():
1024 for ext in info[0]:
1025 existing_extensions[ext] = name
1026
1027 for extension in extensions:
1028 if extension in existing_extensions:
1029 msg = '%s is already registered for "%s"'
1030 raise RegistryError(msg % (extension,
1031 existing_extensions[extension]))
1032
Florent Xicluna5d1155c2011-10-28 14:45:05 +02001033 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001034 raise TypeError('The registered function must be a callable')
1035
1036
1037def register_unpack_format(name, extensions, function, extra_args=None,
1038 description=''):
1039 """Registers an unpack format.
1040
1041 `name` is the name of the format. `extensions` is a list of extensions
1042 corresponding to the format.
1043
1044 `function` is the callable that will be
1045 used to unpack archives. The callable will receive archives to unpack.
1046 If it's unable to handle an archive, it needs to raise a ReadError
1047 exception.
1048
1049 If provided, `extra_args` is a sequence of
1050 (name, value) tuples that will be passed as arguments to the callable.
1051 description can be provided to describe the format, and will be returned
1052 by the get_unpack_formats() function.
1053 """
1054 if extra_args is None:
1055 extra_args = []
1056 _check_unpack_options(extensions, function, extra_args)
1057 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
1058
1059def unregister_unpack_format(name):
Martin Pantereb995702016-07-28 01:11:04 +00001060 """Removes the pack format from the registry."""
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001061 del _UNPACK_FORMATS[name]
1062
1063def _ensure_directory(path):
1064 """Ensure that the parent directory of `path` exists"""
1065 dirname = os.path.dirname(path)
1066 if not os.path.isdir(dirname):
1067 os.makedirs(dirname)
1068
1069def _unpack_zipfile(filename, extract_dir):
1070 """Unpack zip `filename` to `extract_dir`
1071 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001072 import zipfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001073
1074 if not zipfile.is_zipfile(filename):
1075 raise ReadError("%s is not a zip file" % filename)
1076
1077 zip = zipfile.ZipFile(filename)
1078 try:
1079 for info in zip.infolist():
1080 name = info.filename
1081
1082 # don't extract absolute paths or ones with .. in them
1083 if name.startswith('/') or '..' in name:
1084 continue
1085
1086 target = os.path.join(extract_dir, *name.split('/'))
1087 if not target:
1088 continue
1089
1090 _ensure_directory(target)
1091 if not name.endswith('/'):
1092 # file
1093 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +02001094 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001095 try:
1096 f.write(data)
1097 finally:
1098 f.close()
1099 del data
1100 finally:
1101 zip.close()
1102
1103def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +03001104 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001105 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001106 import tarfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001107 try:
1108 tarobj = tarfile.open(filename)
1109 except tarfile.TarError:
1110 raise ReadError(
1111 "%s is not a compressed or uncompressed tar file" % filename)
1112 try:
1113 tarobj.extractall(extract_dir)
1114 finally:
1115 tarobj.close()
1116
1117_UNPACK_FORMATS = {
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001118 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001119 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"),
1120}
1121
1122if _ZLIB_SUPPORTED:
1123 _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
1124 "gzip'ed tar-file")
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001125
Tarek Ziadéffa155a2010-04-29 13:34:35 +00001126if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +03001127 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +00001128 "bzip2'ed tar-file")
1129
Serhiy Storchaka11213772014-08-06 18:50:19 +03001130if _LZMA_SUPPORTED:
1131 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
1132 "xz'ed tar-file")
1133
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001134def _find_unpack_format(filename):
1135 for name, info in _UNPACK_FORMATS.items():
1136 for extension in info[0]:
1137 if filename.endswith(extension):
1138 return name
1139 return None
1140
1141def unpack_archive(filename, extract_dir=None, format=None):
1142 """Unpack an archive.
1143
1144 `filename` is the name of the archive.
1145
1146 `extract_dir` is the name of the target directory, where the archive
1147 is unpacked. If not provided, the current working directory is used.
1148
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001149 `format` is the archive format: one of "zip", "tar", "gztar", "bztar",
1150 or "xztar". Or any other registered format. If not provided,
1151 unpack_archive will use the filename extension and see if an unpacker
1152 was registered for that extension.
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001153
1154 In case none is found, a ValueError is raised.
1155 """
1156 if extract_dir is None:
1157 extract_dir = os.getcwd()
1158
Jelle Zijlstraa12df7b2017-05-05 14:27:12 -07001159 extract_dir = os.fspath(extract_dir)
1160 filename = os.fspath(filename)
1161
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001162 if format is not None:
1163 try:
1164 format_info = _UNPACK_FORMATS[format]
1165 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001166 raise ValueError("Unknown unpack format '{0}'".format(format)) from None
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001167
Nick Coghlanabf202d2011-03-16 13:52:20 -04001168 func = format_info[1]
1169 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001170 else:
1171 # we need to look at the registered unpackers supported extensions
1172 format = _find_unpack_format(filename)
1173 if format is None:
1174 raise ReadError("Unknown archive format '{0}'".format(filename))
1175
1176 func = _UNPACK_FORMATS[format][1]
1177 kwargs = dict(_UNPACK_FORMATS[format][2])
1178 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001179
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001180
1181if hasattr(os, 'statvfs'):
1182
1183 __all__.append('disk_usage')
1184 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Raymond Hettinger5b798ab2015-08-17 22:04:45 -07001185 _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
1186 _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
1187 _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001188
1189 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001190 """Return disk usage statistics about the given path.
1191
Sandro Tosif8ae4fa2012-04-23 20:07:15 +02001192 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001193 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001194 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001195 st = os.statvfs(path)
1196 free = st.f_bavail * st.f_frsize
1197 total = st.f_blocks * st.f_frsize
1198 used = (st.f_blocks - st.f_bfree) * st.f_frsize
1199 return _ntuple_diskusage(total, used, free)
1200
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -07001201elif _WINDOWS:
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001202
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001203 __all__.append('disk_usage')
1204 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
1205
1206 def disk_usage(path):
1207 """Return disk usage statistics about the given path.
1208
Ezio Melotti30b9d5d2013-08-17 15:50:46 +03001209 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001210 'free', which are the amount of total, used and free space, in bytes.
1211 """
1212 total, free = nt._getdiskusage(path)
1213 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001214 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001215
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001216
Sandro Tosid902a142011-08-22 23:28:27 +02001217def chown(path, user=None, group=None):
1218 """Change owner user and group of the given path.
1219
1220 user and group can be the uid/gid or the user/group names, and in that case,
1221 they are converted to their respective uid/gid.
1222 """
1223
1224 if user is None and group is None:
1225 raise ValueError("user and/or group must be set")
1226
1227 _user = user
1228 _group = group
1229
1230 # -1 means don't change it
1231 if user is None:
1232 _user = -1
1233 # user can either be an int (the uid) or a string (the system username)
1234 elif isinstance(user, str):
1235 _user = _get_uid(user)
1236 if _user is None:
1237 raise LookupError("no such user: {!r}".format(user))
1238
1239 if group is None:
1240 _group = -1
1241 elif not isinstance(group, int):
1242 _group = _get_gid(group)
1243 if _group is None:
1244 raise LookupError("no such group: {!r}".format(group))
1245
1246 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001247
1248def get_terminal_size(fallback=(80, 24)):
1249 """Get the size of the terminal window.
1250
1251 For each of the two dimensions, the environment variable, COLUMNS
1252 and LINES respectively, is checked. If the variable is defined and
1253 the value is a positive integer, it is used.
1254
1255 When COLUMNS or LINES is not defined, which is the common case,
1256 the terminal connected to sys.__stdout__ is queried
1257 by invoking os.get_terminal_size.
1258
1259 If the terminal size cannot be successfully queried, either because
1260 the system doesn't support querying, or because we are not
1261 connected to a terminal, the value given in fallback parameter
1262 is used. Fallback defaults to (80, 24) which is the default
1263 size used by many terminal emulators.
1264
1265 The value returned is a named tuple of type os.terminal_size.
1266 """
1267 # columns, lines are the working values
1268 try:
1269 columns = int(os.environ['COLUMNS'])
1270 except (KeyError, ValueError):
1271 columns = 0
1272
1273 try:
1274 lines = int(os.environ['LINES'])
1275 except (KeyError, ValueError):
1276 lines = 0
1277
1278 # only query if necessary
1279 if columns <= 0 or lines <= 0:
1280 try:
1281 size = os.get_terminal_size(sys.__stdout__.fileno())
Serhiy Storchakad30829d2016-04-24 09:58:43 +03001282 except (AttributeError, ValueError, OSError):
1283 # stdout is None, closed, detached, or not a terminal, or
1284 # os.get_terminal_size() is unsupported
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001285 size = os.terminal_size(fallback)
1286 if columns <= 0:
1287 columns = size.columns
1288 if lines <= 0:
1289 lines = size.lines
1290
1291 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001292
Cheryl Sabella5680f652019-02-13 06:25:10 -05001293
1294# Check that a given file can be accessed with the correct mode.
1295# Additionally check that `file` is not a directory, as on Windows
1296# directories pass the os.access check.
1297def _access_check(fn, mode):
1298 return (os.path.exists(fn) and os.access(fn, mode)
1299 and not os.path.isdir(fn))
1300
1301
Brian Curtinc57a3452012-06-22 16:00:30 -05001302def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001303 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001304 conforms to the given mode on the PATH, or None if there is no such
1305 file.
1306
1307 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1308 of os.environ.get("PATH"), or can be overridden with a custom search
1309 path.
1310
1311 """
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001312 # If we're given a path with a directory part, look it up directly rather
1313 # than referring to PATH directories. This includes checking relative to the
1314 # current directory, e.g. ./script
1315 if os.path.dirname(cmd):
1316 if _access_check(cmd, mode):
1317 return cmd
1318 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001319
Cheryl Sabella5680f652019-02-13 06:25:10 -05001320 use_bytes = isinstance(cmd, bytes)
1321
Barry Warsaw618738b2013-04-16 11:05:03 -04001322 if path is None:
Victor Stinner228a3c92019-04-17 16:26:36 +02001323 path = os.environ.get("PATH", None)
1324 if path is None:
1325 try:
1326 path = os.confstr("CS_PATH")
1327 except (AttributeError, ValueError):
1328 # os.confstr() or CS_PATH is not available
1329 path = os.defpath
1330 # bpo-35755: Don't use os.defpath if the PATH environment variable is
Victor Stinner197f0442019-04-17 17:44:06 +02001331 # set to an empty string
Victor Stinner228a3c92019-04-17 16:26:36 +02001332
1333 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
Barry Warsaw618738b2013-04-16 11:05:03 -04001334 if not path:
1335 return None
Victor Stinner228a3c92019-04-17 16:26:36 +02001336
Cheryl Sabella5680f652019-02-13 06:25:10 -05001337 if use_bytes:
1338 path = os.fsencode(path)
1339 path = path.split(os.fsencode(os.pathsep))
1340 else:
1341 path = os.fsdecode(path)
1342 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001343
1344 if sys.platform == "win32":
1345 # The current directory takes precedence on Windows.
Cheryl Sabella5680f652019-02-13 06:25:10 -05001346 curdir = os.curdir
1347 if use_bytes:
1348 curdir = os.fsencode(curdir)
1349 if curdir not in path:
1350 path.insert(0, curdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001351
1352 # PATHEXT is necessary to check on Windows.
1353 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
Cheryl Sabella5680f652019-02-13 06:25:10 -05001354 if use_bytes:
1355 pathext = [os.fsencode(ext) for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001356 # See if the given file matches any of the expected path extensions.
1357 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001358 # If it does match, only test that one, otherwise we have to try
1359 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001360 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1361 files = [cmd]
1362 else:
1363 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001364 else:
1365 # On other platforms you don't have things like PATHEXT to tell you
1366 # what file suffixes are executable, so just pass on cmd as-is.
1367 files = [cmd]
1368
1369 seen = set()
1370 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001371 normdir = os.path.normcase(dir)
1372 if not normdir in seen:
1373 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001374 for thefile in files:
1375 name = os.path.join(dir, thefile)
1376 if _access_check(name, mode):
1377 return name
1378 return None