blob: 6486cd6e5d28565d1be36578571eedba001fda7e [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +020013
14try:
15 import zlib
16 del zlib
17 _ZLIB_SUPPORTED = True
18except ImportError:
19 _ZLIB_SUPPORTED = False
Tarek Ziadé396fad72010-02-23 05:30:31 +000020
21try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000022 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010023 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000024 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040025except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000026 _BZ2_SUPPORTED = False
27
28try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030029 import lzma
30 del lzma
31 _LZMA_SUPPORTED = True
32except ImportError:
33 _LZMA_SUPPORTED = False
34
35try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000036 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040037except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000038 getpwnam = None
39
40try:
41 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040042except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000043 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070045_WINDOWS = os.name == 'nt'
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020046posix = nt = None
47if os.name == 'posix':
48 import posix
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070049elif _WINDOWS:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020050 import nt
51
Inada Naoki4f190302019-03-02 13:31:01 +090052COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 64 * 1024
Giampaolo Rodola413d9552019-05-30 14:05:41 +080053_USE_CP_SENDFILE = hasattr(os, "sendfile") and sys.platform.startswith("linux")
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070054_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020055
Tarek Ziadéc3399782010-02-23 05:39:18 +000056__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
57 "copytree", "move", "rmtree", "Error", "SpecialFileError",
58 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000059 "register_archive_format", "unregister_archive_format",
60 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020061 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020062 "ignore_patterns", "chown", "which", "get_terminal_size",
63 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020064 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000065
Andrew Svetlov3438fa42012-12-17 23:35:18 +020066class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000067 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000068
Hynek Schlawack48653762012-10-07 12:49:58 +020069class SameFileError(Error):
70 """Raised when source and destination are the same file."""
71
Andrew Svetlov3438fa42012-12-17 23:35:18 +020072class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000073 """Raised when trying to do a kind of operation (e.g. copying) which is
74 not supported on a special file (e.g. a named pipe)"""
75
Andrew Svetlov3438fa42012-12-17 23:35:18 +020076class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000077 """Raised when a command could not be executed"""
78
Andrew Svetlov3438fa42012-12-17 23:35:18 +020079class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000080 """Raised when an archive cannot be read"""
81
82class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030083 """Raised when a registry operation with the archiving
Raymond Hettinger15f44ab2016-08-30 10:47:49 -070084 and unpacking registries fails"""
Tarek Ziadé6ac91722010-04-28 17:51:36 +000085
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020086class _GiveupOnFastCopy(Exception):
87 """Raised as a signal to fallback on using raw read()/write()
88 file copy when fast-copy functions fail to do so.
89 """
Tarek Ziadé6ac91722010-04-28 17:51:36 +000090
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070091def _fastcopy_fcopyfile(fsrc, fdst, flags):
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020092 """Copy a regular file content or metadata by using high-performance
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070093 fcopyfile(3) syscall (macOS).
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020094 """
95 try:
96 infd = fsrc.fileno()
97 outfd = fdst.fileno()
98 except Exception as err:
99 raise _GiveupOnFastCopy(err) # not a regular file
100
101 try:
102 posix._fcopyfile(infd, outfd, flags)
103 except OSError as err:
104 err.filename = fsrc.name
105 err.filename2 = fdst.name
106 if err.errno in {errno.EINVAL, errno.ENOTSUP}:
107 raise _GiveupOnFastCopy(err)
108 else:
109 raise err from None
110
111def _fastcopy_sendfile(fsrc, fdst):
112 """Copy data from one regular mmap-like fd to another by using
113 high-performance sendfile(2) syscall.
Giampaolo Rodola413d9552019-05-30 14:05:41 +0800114 This should work on Linux >= 2.6.33 only.
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200115 """
116 # Note: copyfileobj() is left alone in order to not introduce any
117 # unexpected breakage. Possible risks by using zero-copy calls
118 # in copyfileobj() are:
119 # - fdst cannot be open in "a"(ppend) mode
120 # - fsrc and fdst may be open in "t"(ext) mode
121 # - fsrc may be a BufferedReader (which hides unread data in a buffer),
122 # GzipFile (which decompresses data), HTTPResponse (which decodes
123 # chunks).
124 # - possibly others (e.g. encrypted fs/partition?)
Giampaolo Rodola413d9552019-05-30 14:05:41 +0800125 global _USE_CP_SENDFILE
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200126 try:
127 infd = fsrc.fileno()
128 outfd = fdst.fileno()
129 except Exception as err:
130 raise _GiveupOnFastCopy(err) # not a regular file
131
132 # Hopefully the whole file will be copied in a single call.
133 # sendfile() is called in a loop 'till EOF is reached (0 return)
134 # so a bufsize smaller or bigger than the actual file size
135 # should not make any difference, also in case the file content
136 # changes while being copied.
137 try:
138 blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB
139 except Exception:
140 blocksize = 2 ** 27 # 128MB
141
142 offset = 0
143 while True:
144 try:
145 sent = os.sendfile(outfd, infd, offset, blocksize)
146 except OSError as err:
147 # ...in oder to have a more informative exception.
148 err.filename = fsrc.name
149 err.filename2 = fdst.name
150
151 if err.errno == errno.ENOTSOCK:
152 # sendfile() on this platform (probably Linux < 2.6.33)
153 # does not support copies between regular files (only
154 # sockets).
Giampaolo Rodola413d9552019-05-30 14:05:41 +0800155 _USE_CP_SENDFILE = False
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200156 raise _GiveupOnFastCopy(err)
157
158 if err.errno == errno.ENOSPC: # filesystem is full
159 raise err from None
160
161 # Give up on first call and if no data was copied.
162 if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
163 raise _GiveupOnFastCopy(err)
164
165 raise err
166 else:
167 if sent == 0:
168 break # EOF
169 offset += sent
170
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700171def _copyfileobj_readinto(fsrc, fdst, length=COPY_BUFSIZE):
172 """readinto()/memoryview() based variant of copyfileobj().
173 *fsrc* must support readinto() method and both files must be
174 open in binary mode.
175 """
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200176 # Localize variable access to minimize overhead.
177 fsrc_readinto = fsrc.readinto
178 fdst_write = fdst.write
179 with memoryview(bytearray(length)) as mv:
180 while True:
181 n = fsrc_readinto(mv)
182 if not n:
183 break
184 elif n < length:
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700185 with mv[:n] as smv:
186 fdst.write(smv)
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200187 else:
188 fdst_write(mv)
189
Giampaolo Rodola3b0abb02019-02-24 15:46:40 -0800190def copyfileobj(fsrc, fdst, length=0):
Greg Stein42bb8b32000-07-12 09:55:30 +0000191 """copy data from file-like object fsrc to file-like object fdst"""
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700192 # Localize variable access to minimize overhead.
Giampaolo Rodola3b0abb02019-02-24 15:46:40 -0800193 if not length:
194 length = COPY_BUFSIZE
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700195 fsrc_read = fsrc.read
196 fdst_write = fdst.write
197 while True:
198 buf = fsrc_read(length)
199 if not buf:
200 break
201 fdst_write(buf)
Greg Stein42bb8b32000-07-12 09:55:30 +0000202
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000203def _samefile(src, dst):
204 # Macintosh, Unix.
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800205 if isinstance(src, os.DirEntry) and hasattr(os.path, 'samestat'):
206 try:
207 return os.path.samestat(src.stat(), os.stat(dst))
208 except OSError:
209 return False
210
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000211 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +0000212 try:
213 return os.path.samefile(src, dst)
214 except OSError:
215 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000216
217 # All other platforms: check for same pathname.
218 return (os.path.normcase(os.path.abspath(src)) ==
219 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +0000220
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800221def _stat(fn):
222 return fn.stat() if isinstance(fn, os.DirEntry) else os.stat(fn)
223
224def _islink(fn):
225 return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn)
226
Larry Hastingsb4038062012-07-15 10:57:38 -0700227def copyfile(src, dst, *, follow_symlinks=True):
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700228 """Copy data from src to dst in the most efficient way possible.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100229
Larry Hastingsb4038062012-07-15 10:57:38 -0700230 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +0100231 symlink will be created instead of copying the file it points to.
232
233 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000234 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +0200235 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000236
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700237 file_size = 0
238 for i, fn in enumerate([src, dst]):
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000239 try:
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800240 st = _stat(fn)
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000241 except OSError:
242 # File most likely does not exist
243 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000244 else:
245 # XXX What about other special files? (sockets, devices...)
246 if stat.S_ISFIFO(st.st_mode):
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800247 fn = fn.path if isinstance(fn, os.DirEntry) else fn
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000248 raise SpecialFileError("`%s` is a named pipe" % fn)
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700249 if _WINDOWS and i == 0:
250 file_size = st.st_size
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000251
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800252 if not follow_symlinks and _islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100253 os.symlink(os.readlink(src), dst)
254 else:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200255 with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700256 # macOS
257 if _HAS_FCOPYFILE:
258 try:
259 _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA)
260 return dst
261 except _GiveupOnFastCopy:
262 pass
Giampaolo Rodola413d9552019-05-30 14:05:41 +0800263 # Linux
264 elif _USE_CP_SENDFILE:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200265 try:
266 _fastcopy_sendfile(fsrc, fdst)
267 return dst
268 except _GiveupOnFastCopy:
269 pass
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700270 # Windows, see:
271 # https://github.com/python/cpython/pull/7160#discussion_r195405230
272 elif _WINDOWS and file_size > 0:
273 _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE))
274 return dst
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200275
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700276 copyfileobj(fsrc, fdst)
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200277
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500278 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000279
Larry Hastingsb4038062012-07-15 10:57:38 -0700280def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100281 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000282
Larry Hastingsb4038062012-07-15 10:57:38 -0700283 If follow_symlinks is not set, symlinks aren't followed if and only
284 if both `src` and `dst` are symlinks. If `lchmod` isn't available
285 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100286
287 """
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800288 if not follow_symlinks and _islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100289 if hasattr(os, 'lchmod'):
290 stat_func, chmod_func = os.lstat, os.lchmod
291 else:
292 return
Antoine Pitrou78091e62011-12-29 18:54:15 +0100293 else:
Anthony Sottile8377cd42019-02-25 14:32:27 -0800294 stat_func, chmod_func = _stat, os.chmod
Antoine Pitrou78091e62011-12-29 18:54:15 +0100295
296 st = stat_func(src)
297 chmod_func(dst, stat.S_IMODE(st.st_mode))
298
Larry Hastingsad5ae042012-07-14 17:55:11 -0700299if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700300 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700301 """Copy extended filesystem attributes from `src` to `dst`.
302
303 Overwrite existing attributes.
304
Larry Hastingsb4038062012-07-15 10:57:38 -0700305 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700306
307 """
308
Hynek Schlawack0beab052013-02-05 08:22:44 +0100309 try:
310 names = os.listxattr(src, follow_symlinks=follow_symlinks)
311 except OSError as e:
Ying Wanga16387a2019-05-29 23:25:31 -0400312 if e.errno not in (errno.ENOTSUP, errno.ENODATA, errno.EINVAL):
Hynek Schlawack0beab052013-02-05 08:22:44 +0100313 raise
314 return
315 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700316 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700317 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
318 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700319 except OSError as e:
Ying Wanga16387a2019-05-29 23:25:31 -0400320 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA,
321 errno.EINVAL):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700322 raise
323else:
324 def _copyxattr(*args, **kwargs):
325 pass
326
Larry Hastingsb4038062012-07-15 10:57:38 -0700327def copystat(src, dst, *, follow_symlinks=True):
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200328 """Copy file metadata
Antoine Pitrou78091e62011-12-29 18:54:15 +0100329
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200330 Copy the permission bits, last access time, last modification time, and
331 flags from `src` to `dst`. On Linux, copystat() also copies the "extended
332 attributes" where possible. The file contents, owner, and group are
333 unaffected. `src` and `dst` are path names given as strings.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100334
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200335 If the optional flag `follow_symlinks` is not set, symlinks aren't
336 followed if and only if both `src` and `dst` are symlinks.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100337 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700338 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100339 pass
340
Larry Hastings9cf065c2012-06-22 16:30:09 -0700341 # follow symlinks (aka don't not follow symlinks)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800342 follow = follow_symlinks or not (_islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700343 if follow:
344 # use the real function if it exists
345 def lookup(name):
346 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100347 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700348 # use the real function only if it exists
349 # *and* it supports follow_symlinks
350 def lookup(name):
351 fn = getattr(os, name, _nop)
352 if fn in os.supports_follow_symlinks:
353 return fn
354 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100355
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800356 if isinstance(src, os.DirEntry):
357 st = src.stat(follow_symlinks=follow)
358 else:
359 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000360 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700361 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
362 follow_symlinks=follow)
Olexa Bilaniuk79efbb72019-05-09 22:22:06 -0500363 # We must copy extended attributes before the file is (potentially)
364 # chmod()'ed read-only, otherwise setxattr() will error with -EACCES.
365 _copyxattr(src, dst, follow_symlinks=follow)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700366 try:
367 lookup("chmod")(dst, mode, follow_symlinks=follow)
368 except NotImplementedError:
369 # if we got a NotImplementedError, it's because
370 # * follow_symlinks=False,
371 # * lchown() is unavailable, and
372 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300373 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700374 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
375 # (it returned ENOSUP.)
376 # therefore we're out of options--we simply cannot chown the
377 # symlink. give up, suppress the error.
378 # (which is what shutil always did in this circumstance.)
379 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100380 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000381 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700382 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000383 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700384 for err in 'EOPNOTSUPP', 'ENOTSUP':
385 if hasattr(errno, err) and why.errno == getattr(errno, err):
386 break
387 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000388 raise
Antoine Pitrou424246f2012-05-12 19:02:01 +0200389
Larry Hastingsb4038062012-07-15 10:57:38 -0700390def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500391 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000392
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000393 The destination may be a directory.
394
Larry Hastingsb4038062012-07-15 10:57:38 -0700395 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100396 resembles GNU's "cp -P src dst".
397
Hynek Schlawack48653762012-10-07 12:49:58 +0200398 If source and destination are the same file, a SameFileError will be
399 raised.
400
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000401 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000402 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000403 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700404 copyfile(src, dst, follow_symlinks=follow_symlinks)
405 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500406 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000407
Larry Hastingsb4038062012-07-15 10:57:38 -0700408def copy2(src, dst, *, follow_symlinks=True):
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200409 """Copy data and metadata. Return the file's destination.
410
411 Metadata is copied with copystat(). Please see the copystat function
412 for more information.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000413
414 The destination may be a directory.
415
Larry Hastingsb4038062012-07-15 10:57:38 -0700416 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100417 resembles GNU's "cp -P src dst".
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000418 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000419 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000420 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700421 copyfile(src, dst, follow_symlinks=follow_symlinks)
422 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500423 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000424
Georg Brandl2ee470f2008-07-16 12:55:28 +0000425def ignore_patterns(*patterns):
426 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000427
Georg Brandl2ee470f2008-07-16 12:55:28 +0000428 Patterns is a sequence of glob-style patterns
429 that are used to exclude files"""
430 def _ignore_patterns(path, names):
431 ignored_names = []
432 for pattern in patterns:
433 ignored_names.extend(fnmatch.filter(names, pattern))
434 return set(ignored_names)
435 return _ignore_patterns
436
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800437def _copytree(entries, src, dst, symlinks, ignore, copy_function,
jab9e00d9e2018-12-28 13:03:40 -0500438 ignore_dangling_symlinks, dirs_exist_ok=False):
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800439 if ignore is not None:
440 ignored_names = ignore(src, set(os.listdir(src)))
441 else:
442 ignored_names = set()
443
jab9e00d9e2018-12-28 13:03:40 -0500444 os.makedirs(dst, exist_ok=dirs_exist_ok)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800445 errors = []
446 use_srcentry = copy_function is copy2 or copy_function is copy
447
448 for srcentry in entries:
449 if srcentry.name in ignored_names:
450 continue
451 srcname = os.path.join(src, srcentry.name)
452 dstname = os.path.join(dst, srcentry.name)
453 srcobj = srcentry if use_srcentry else srcname
454 try:
455 if srcentry.is_symlink():
456 linkto = os.readlink(srcname)
457 if symlinks:
458 # We can't just leave it to `copy_function` because legacy
459 # code with a custom `copy_function` may rely on copytree
460 # doing the right thing.
461 os.symlink(linkto, dstname)
462 copystat(srcobj, dstname, follow_symlinks=not symlinks)
463 else:
464 # ignore dangling symlink if the flag is on
465 if not os.path.exists(linkto) and ignore_dangling_symlinks:
466 continue
jab9e00d9e2018-12-28 13:03:40 -0500467 # otherwise let the copy occur. copy2 will raise an error
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800468 if srcentry.is_dir():
469 copytree(srcobj, dstname, symlinks, ignore,
jab9e00d9e2018-12-28 13:03:40 -0500470 copy_function, dirs_exist_ok=dirs_exist_ok)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800471 else:
472 copy_function(srcobj, dstname)
473 elif srcentry.is_dir():
jab9e00d9e2018-12-28 13:03:40 -0500474 copytree(srcobj, dstname, symlinks, ignore, copy_function,
475 dirs_exist_ok=dirs_exist_ok)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800476 else:
477 # Will raise a SpecialFileError for unsupported file types
Giampaolo Rodolac606a9c2019-02-26 12:04:41 +0100478 copy_function(srcobj, dstname)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800479 # catch the Error from the recursive copytree so that we can
480 # continue with other files
481 except Error as err:
482 errors.extend(err.args[0])
483 except OSError as why:
484 errors.append((srcname, dstname, str(why)))
485 try:
486 copystat(src, dst)
487 except OSError as why:
488 # Copying file access times may fail on Windows
489 if getattr(why, 'winerror', None) is None:
490 errors.append((src, dst, str(why)))
491 if errors:
492 raise Error(errors)
493 return dst
494
Tarek Ziadéfb437512010-04-20 08:57:33 +0000495def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
jab9e00d9e2018-12-28 13:03:40 -0500496 ignore_dangling_symlinks=False, dirs_exist_ok=False):
497 """Recursively copy a directory tree and return the destination directory.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000498
jab9e00d9e2018-12-28 13:03:40 -0500499 dirs_exist_ok dictates whether to raise an exception in case dst or any
500 missing parent directory already exists.
501
Neal Norwitza4c93b62003-02-23 21:36:32 +0000502 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000503
504 If the optional symlinks flag is true, symbolic links in the
505 source tree result in symbolic links in the destination tree; if
506 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000507 links are copied. If the file pointed by the symlink doesn't
508 exist, an exception will be added in the list of errors raised in
509 an Error exception at the end of the copy process.
510
511 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000512 want to silence this exception. Notice that this has no effect on
513 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000514
Georg Brandl2ee470f2008-07-16 12:55:28 +0000515 The optional ignore argument is a callable. If given, it
516 is called with the `src` parameter, which is the directory
517 being visited by copytree(), and `names` which is the list of
518 `src` contents, as returned by os.listdir():
519
520 callable(src, names) -> ignored_names
521
522 Since copytree() is called recursively, the callable will be
523 called once for each directory that is copied. It returns a
524 list of names relative to the `src` directory that should
525 not be copied.
526
Tarek Ziadé5340db32010-04-19 22:30:51 +0000527 The optional copy_function argument is a callable that will be used
528 to copy each file. It will be called with the source path and the
529 destination path as arguments. By default, copy2() is used, but any
530 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000531
532 """
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800533 with os.scandir(src) as entries:
534 return _copytree(entries=entries, src=src, dst=dst, symlinks=symlinks,
535 ignore=ignore, copy_function=copy_function,
jab9e00d9e2018-12-28 13:03:40 -0500536 ignore_dangling_symlinks=ignore_dangling_symlinks,
537 dirs_exist_ok=dirs_exist_ok)
Guido van Rossumd7673291998-02-06 21:38:09 +0000538
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200539# version vulnerable to race conditions
540def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000541 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200542 with os.scandir(path) as scandir_it:
543 entries = list(scandir_it)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000544 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200545 onerror(os.scandir, path, sys.exc_info())
546 entries = []
547 for entry in entries:
548 fullname = entry.path
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000549 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200550 is_dir = entry.is_dir(follow_symlinks=False)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200551 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200552 is_dir = False
553 if is_dir:
554 try:
555 if entry.is_symlink():
556 # This can only happen if someone replaces
557 # a directory with a symlink after the call to
558 # os.scandir or entry.is_dir above.
559 raise OSError("Cannot call rmtree on a symbolic link")
560 except OSError:
561 onerror(os.path.islink, fullname, sys.exc_info())
562 continue
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200563 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000564 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000565 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200566 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200567 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200568 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000569 try:
570 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200571 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000572 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000573
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200574# Version using fd-based APIs to protect against races
575def _rmtree_safe_fd(topfd, path, onerror):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200576 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200577 with os.scandir(topfd) as scandir_it:
578 entries = list(scandir_it)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100579 except OSError as err:
580 err.filename = path
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200581 onerror(os.scandir, path, sys.exc_info())
582 return
583 for entry in entries:
584 fullname = os.path.join(path, entry.name)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200585 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200586 is_dir = entry.is_dir(follow_symlinks=False)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100587 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200588 is_dir = False
Serhiy Storchakae9b51c02019-05-31 11:30:37 +0300589 else:
590 if is_dir:
591 try:
592 orig_st = entry.stat(follow_symlinks=False)
593 is_dir = stat.S_ISDIR(orig_st.st_mode)
594 except OSError:
595 onerror(os.lstat, fullname, sys.exc_info())
596 continue
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200597 if is_dir:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200598 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200599 dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100600 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200601 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200602 else:
603 try:
604 if os.path.samestat(orig_st, os.fstat(dirfd)):
605 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200606 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200607 os.rmdir(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100608 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200609 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100610 else:
611 try:
612 # This can only happen if someone replaces
613 # a directory with a symlink after the call to
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200614 # os.scandir or stat.S_ISDIR above.
Hynek Schlawackb5501102012-12-10 09:11:25 +0100615 raise OSError("Cannot call rmtree on a symbolic "
616 "link")
617 except OSError:
618 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200619 finally:
620 os.close(dirfd)
621 else:
622 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200623 os.unlink(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100624 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200625 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200626
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200627_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
628 os.supports_dir_fd and
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200629 os.scandir in os.supports_fd and
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200630 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000631
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200632def rmtree(path, ignore_errors=False, onerror=None):
633 """Recursively delete a directory tree.
634
635 If ignore_errors is set, errors are ignored; otherwise, if onerror
636 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200637 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200638 path is the argument to that function that caused it to fail; and
639 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
640 is false and onerror is None, an exception is raised.
641
642 """
643 if ignore_errors:
644 def onerror(*args):
645 pass
646 elif onerror is None:
647 def onerror(*args):
648 raise
649 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200650 # While the unsafe rmtree works fine on bytes, the fd based does not.
651 if isinstance(path, bytes):
652 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200653 # Note: To guard against symlink races, we use the standard
654 # lstat()/open()/fstat() trick.
655 try:
656 orig_st = os.lstat(path)
657 except Exception:
658 onerror(os.lstat, path, sys.exc_info())
659 return
660 try:
661 fd = os.open(path, os.O_RDONLY)
662 except Exception:
663 onerror(os.lstat, path, sys.exc_info())
664 return
665 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100666 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200667 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200668 try:
669 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200670 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200671 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200672 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100673 try:
674 # symlinks to directories are forbidden, see bug #1669
675 raise OSError("Cannot call rmtree on a symbolic link")
676 except OSError:
677 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200678 finally:
679 os.close(fd)
680 else:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200681 try:
682 if os.path.islink(path):
683 # symlinks to directories are forbidden, see bug #1669
684 raise OSError("Cannot call rmtree on a symbolic link")
685 except OSError:
686 onerror(os.path.islink, path, sys.exc_info())
687 # can't continue even if onerror hook returns
688 return
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200689 return _rmtree_unsafe(path, onerror)
690
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000691# Allow introspection of whether or not the hardening against symlink
692# attacks is supported on the current platform
693rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000694
Christian Heimesada8c3b2008-03-18 18:26:33 +0000695def _basename(path):
696 # A basename() variant which first strips the trailing slash, if present.
697 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200698 sep = os.path.sep + (os.path.altsep or '')
699 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000700
R David Murray6ffface2014-06-11 14:40:13 -0400701def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000702 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500703 similar to the Unix "mv" command. Return the file or directory's
704 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000705
706 If the destination is a directory or a symlink to a directory, the source
707 is moved inside the directory. The destination path must not already
708 exist.
709
710 If the destination already exists but is not a directory, it may be
711 overwritten depending on os.rename() semantics.
712
713 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100714 Otherwise, src is copied to the destination and then removed. Symlinks are
715 recreated under the new name if os.rename() fails because of cross
716 filesystem renames.
717
R David Murray6ffface2014-06-11 14:40:13 -0400718 The optional `copy_function` argument is a callable that will be used
719 to copy the source or it will be delegated to `copytree`.
720 By default, copy2() is used, but any function that supports the same
721 signature (like copy()) can be used.
722
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000723 A lot more could be done here... A look at a mv.c shows a lot of
724 the issues this implementation glosses over.
725
726 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000727 real_dst = dst
728 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200729 if _samefile(src, dst):
730 # We might be on a case insensitive filesystem,
731 # perform the rename anyway.
732 os.rename(src, dst)
733 return
734
Christian Heimesada8c3b2008-03-18 18:26:33 +0000735 real_dst = os.path.join(dst, _basename(src))
736 if os.path.exists(real_dst):
737 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000738 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000739 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200740 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100741 if os.path.islink(src):
742 linkto = os.readlink(src)
743 os.symlink(linkto, real_dst)
744 os.unlink(src)
745 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000746 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400747 raise Error("Cannot move a directory '%s' into itself"
748 " '%s'." % (src, dst))
749 copytree(src, real_dst, copy_function=copy_function,
750 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000751 rmtree(src)
752 else:
R David Murray6ffface2014-06-11 14:40:13 -0400753 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000754 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500755 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000756
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000757def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300758 src = os.path.abspath(src)
759 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000760 if not src.endswith(os.path.sep):
761 src += os.path.sep
762 if not dst.endswith(os.path.sep):
763 dst += os.path.sep
764 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000765
766def _get_gid(name):
767 """Returns a gid, given a group name."""
768 if getgrnam is None or name is None:
769 return None
770 try:
771 result = getgrnam(name)
772 except KeyError:
773 result = None
774 if result is not None:
775 return result[2]
776 return None
777
778def _get_uid(name):
779 """Returns an uid, given a user name."""
780 if getpwnam is None or name is None:
781 return None
782 try:
783 result = getpwnam(name)
784 except KeyError:
785 result = None
786 if result is not None:
787 return result[2]
788 return None
789
790def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
791 owner=None, group=None, logger=None):
792 """Create a (possibly compressed) tar file from all the files under
793 'base_dir'.
794
Serhiy Storchaka11213772014-08-06 18:50:19 +0300795 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000796
797 'owner' and 'group' can be used to define an owner and a group for the
798 archive that is being built. If not provided, the current owner and group
799 will be used.
800
Éric Araujo4433a5f2010-12-15 20:26:30 +0000801 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300802 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000803
804 Returns the output filename.
805 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200806 if compress is None:
807 tar_compression = ''
808 elif _ZLIB_SUPPORTED and compress == 'gzip':
809 tar_compression = 'gz'
810 elif _BZ2_SUPPORTED and compress == 'bzip2':
811 tar_compression = 'bz2'
812 elif _LZMA_SUPPORTED and compress == 'xz':
813 tar_compression = 'xz'
814 else:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000815 raise ValueError("bad value for 'compress', or compression format not "
816 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000817
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200818 import tarfile # late import for breaking circular dependency
819
820 compress_ext = '.' + tar_compression if compress else ''
821 archive_name = base_name + '.tar' + compress_ext
Tarek Ziadé396fad72010-02-23 05:30:31 +0000822 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000823
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200824 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000825 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200826 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000827 if not dry_run:
828 os.makedirs(archive_dir)
829
Tarek Ziadé396fad72010-02-23 05:30:31 +0000830 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000831 if logger is not None:
832 logger.info('Creating tar archive')
833
834 uid = _get_uid(owner)
835 gid = _get_gid(group)
836
837 def _set_uid_gid(tarinfo):
838 if gid is not None:
839 tarinfo.gid = gid
840 tarinfo.gname = group
841 if uid is not None:
842 tarinfo.uid = uid
843 tarinfo.uname = owner
844 return tarinfo
845
846 if not dry_run:
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200847 tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000848 try:
849 tar.add(base_dir, filter=_set_uid_gid)
850 finally:
851 tar.close()
852
Tarek Ziadé396fad72010-02-23 05:30:31 +0000853 return archive_name
854
Tarek Ziadé396fad72010-02-23 05:30:31 +0000855def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
856 """Create a zip file from all the files under 'base_dir'.
857
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200858 The output zip file will be named 'base_name' + ".zip". Returns the
859 name of the output zip file.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000860 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200861 import zipfile # late import for breaking circular dependency
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400862
Tarek Ziadé396fad72010-02-23 05:30:31 +0000863 zip_filename = base_name + ".zip"
864 archive_dir = os.path.dirname(base_name)
865
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200866 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000867 if logger is not None:
868 logger.info("creating %s", archive_dir)
869 if not dry_run:
870 os.makedirs(archive_dir)
871
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400872 if logger is not None:
873 logger.info("creating '%s' and adding '%s' to it",
874 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000875
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400876 if not dry_run:
877 with zipfile.ZipFile(zip_filename, "w",
878 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300879 path = os.path.normpath(base_dir)
Serhiy Storchaka666de772016-10-23 15:55:09 +0300880 if path != os.curdir:
881 zf.write(path, path)
882 if logger is not None:
883 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400884 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300885 for name in sorted(dirnames):
886 path = os.path.normpath(os.path.join(dirpath, name))
887 zf.write(path, path)
888 if logger is not None:
889 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400890 for name in filenames:
891 path = os.path.normpath(os.path.join(dirpath, name))
892 if os.path.isfile(path):
893 zf.write(path, path)
894 if logger is not None:
895 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000896
897 return zip_filename
898
899_ARCHIVE_FORMATS = {
Tarek Ziadé396fad72010-02-23 05:30:31 +0000900 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200901}
902
903if _ZLIB_SUPPORTED:
904 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
905 "gzip'ed tar-file")
906 _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000907
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000908if _BZ2_SUPPORTED:
909 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
910 "bzip2'ed tar-file")
911
Serhiy Storchaka11213772014-08-06 18:50:19 +0300912if _LZMA_SUPPORTED:
913 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
914 "xz'ed tar-file")
915
Tarek Ziadé396fad72010-02-23 05:30:31 +0000916def get_archive_formats():
917 """Returns a list of supported formats for archiving and unarchiving.
918
919 Each element of the returned sequence is a tuple (name, description)
920 """
921 formats = [(name, registry[2]) for name, registry in
922 _ARCHIVE_FORMATS.items()]
923 formats.sort()
924 return formats
925
926def register_archive_format(name, function, extra_args=None, description=''):
927 """Registers an archive format.
928
929 name is the name of the format. function is the callable that will be
930 used to create archives. If provided, extra_args is a sequence of
931 (name, value) tuples that will be passed as arguments to the callable.
932 description can be provided to describe the format, and will be returned
933 by the get_archive_formats() function.
934 """
935 if extra_args is None:
936 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200937 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000938 raise TypeError('The %s object is not callable' % function)
939 if not isinstance(extra_args, (tuple, list)):
940 raise TypeError('extra_args needs to be a sequence')
941 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200942 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000943 raise TypeError('extra_args elements are : (arg_name, value)')
944
945 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
946
947def unregister_archive_format(name):
948 del _ARCHIVE_FORMATS[name]
949
950def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
951 dry_run=0, owner=None, group=None, logger=None):
952 """Create an archive file (eg. zip or tar).
953
954 'base_name' is the name of the file to create, minus any format-specific
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200955 extension; 'format' is the archive format: one of "zip", "tar", "gztar",
956 "bztar", or "xztar". Or any other registered format.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000957
958 'root_dir' is a directory that will be the root directory of the
959 archive; ie. we typically chdir into 'root_dir' before creating the
960 archive. 'base_dir' is the directory where we start archiving from;
961 ie. 'base_dir' will be the common prefix of all files and
962 directories in the archive. 'root_dir' and 'base_dir' both default
963 to the current directory. Returns the name of the archive file.
964
965 'owner' and 'group' are used when creating a tar archive. By default,
966 uses the current owner and group.
967 """
968 save_cwd = os.getcwd()
969 if root_dir is not None:
970 if logger is not None:
971 logger.debug("changing into '%s'", root_dir)
972 base_name = os.path.abspath(base_name)
973 if not dry_run:
974 os.chdir(root_dir)
975
976 if base_dir is None:
977 base_dir = os.curdir
978
979 kwargs = {'dry_run': dry_run, 'logger': logger}
980
981 try:
982 format_info = _ARCHIVE_FORMATS[format]
983 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300984 raise ValueError("unknown archive format '%s'" % format) from None
Tarek Ziadé396fad72010-02-23 05:30:31 +0000985
986 func = format_info[0]
987 for arg, val in format_info[1]:
988 kwargs[arg] = val
989
990 if format != 'zip':
991 kwargs['owner'] = owner
992 kwargs['group'] = group
993
994 try:
995 filename = func(base_name, base_dir, **kwargs)
996 finally:
997 if root_dir is not None:
998 if logger is not None:
999 logger.debug("changing back to '%s'", save_cwd)
1000 os.chdir(save_cwd)
1001
1002 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001003
1004
1005def get_unpack_formats():
1006 """Returns a list of supported formats for unpacking.
1007
1008 Each element of the returned sequence is a tuple
1009 (name, extensions, description)
1010 """
1011 formats = [(name, info[0], info[3]) for name, info in
1012 _UNPACK_FORMATS.items()]
1013 formats.sort()
1014 return formats
1015
1016def _check_unpack_options(extensions, function, extra_args):
1017 """Checks what gets registered as an unpacker."""
1018 # first make sure no other unpacker is registered for this extension
1019 existing_extensions = {}
1020 for name, info in _UNPACK_FORMATS.items():
1021 for ext in info[0]:
1022 existing_extensions[ext] = name
1023
1024 for extension in extensions:
1025 if extension in existing_extensions:
1026 msg = '%s is already registered for "%s"'
1027 raise RegistryError(msg % (extension,
1028 existing_extensions[extension]))
1029
Florent Xicluna5d1155c2011-10-28 14:45:05 +02001030 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001031 raise TypeError('The registered function must be a callable')
1032
1033
1034def register_unpack_format(name, extensions, function, extra_args=None,
1035 description=''):
1036 """Registers an unpack format.
1037
1038 `name` is the name of the format. `extensions` is a list of extensions
1039 corresponding to the format.
1040
1041 `function` is the callable that will be
1042 used to unpack archives. The callable will receive archives to unpack.
1043 If it's unable to handle an archive, it needs to raise a ReadError
1044 exception.
1045
1046 If provided, `extra_args` is a sequence of
1047 (name, value) tuples that will be passed as arguments to the callable.
1048 description can be provided to describe the format, and will be returned
1049 by the get_unpack_formats() function.
1050 """
1051 if extra_args is None:
1052 extra_args = []
1053 _check_unpack_options(extensions, function, extra_args)
1054 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
1055
1056def unregister_unpack_format(name):
Martin Pantereb995702016-07-28 01:11:04 +00001057 """Removes the pack format from the registry."""
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001058 del _UNPACK_FORMATS[name]
1059
1060def _ensure_directory(path):
1061 """Ensure that the parent directory of `path` exists"""
1062 dirname = os.path.dirname(path)
1063 if not os.path.isdir(dirname):
1064 os.makedirs(dirname)
1065
1066def _unpack_zipfile(filename, extract_dir):
1067 """Unpack zip `filename` to `extract_dir`
1068 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001069 import zipfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001070
1071 if not zipfile.is_zipfile(filename):
1072 raise ReadError("%s is not a zip file" % filename)
1073
1074 zip = zipfile.ZipFile(filename)
1075 try:
1076 for info in zip.infolist():
1077 name = info.filename
1078
1079 # don't extract absolute paths or ones with .. in them
1080 if name.startswith('/') or '..' in name:
1081 continue
1082
1083 target = os.path.join(extract_dir, *name.split('/'))
1084 if not target:
1085 continue
1086
1087 _ensure_directory(target)
1088 if not name.endswith('/'):
1089 # file
1090 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +02001091 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001092 try:
1093 f.write(data)
1094 finally:
1095 f.close()
1096 del data
1097 finally:
1098 zip.close()
1099
1100def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +03001101 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001102 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001103 import tarfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001104 try:
1105 tarobj = tarfile.open(filename)
1106 except tarfile.TarError:
1107 raise ReadError(
1108 "%s is not a compressed or uncompressed tar file" % filename)
1109 try:
1110 tarobj.extractall(extract_dir)
1111 finally:
1112 tarobj.close()
1113
1114_UNPACK_FORMATS = {
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001115 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001116 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"),
1117}
1118
1119if _ZLIB_SUPPORTED:
1120 _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
1121 "gzip'ed tar-file")
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001122
Tarek Ziadéffa155a2010-04-29 13:34:35 +00001123if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +03001124 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +00001125 "bzip2'ed tar-file")
1126
Serhiy Storchaka11213772014-08-06 18:50:19 +03001127if _LZMA_SUPPORTED:
1128 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
1129 "xz'ed tar-file")
1130
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001131def _find_unpack_format(filename):
1132 for name, info in _UNPACK_FORMATS.items():
1133 for extension in info[0]:
1134 if filename.endswith(extension):
1135 return name
1136 return None
1137
1138def unpack_archive(filename, extract_dir=None, format=None):
1139 """Unpack an archive.
1140
1141 `filename` is the name of the archive.
1142
1143 `extract_dir` is the name of the target directory, where the archive
1144 is unpacked. If not provided, the current working directory is used.
1145
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001146 `format` is the archive format: one of "zip", "tar", "gztar", "bztar",
1147 or "xztar". Or any other registered format. If not provided,
1148 unpack_archive will use the filename extension and see if an unpacker
1149 was registered for that extension.
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001150
1151 In case none is found, a ValueError is raised.
1152 """
1153 if extract_dir is None:
1154 extract_dir = os.getcwd()
1155
Jelle Zijlstraa12df7b2017-05-05 14:27:12 -07001156 extract_dir = os.fspath(extract_dir)
1157 filename = os.fspath(filename)
1158
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001159 if format is not None:
1160 try:
1161 format_info = _UNPACK_FORMATS[format]
1162 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001163 raise ValueError("Unknown unpack format '{0}'".format(format)) from None
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001164
Nick Coghlanabf202d2011-03-16 13:52:20 -04001165 func = format_info[1]
1166 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001167 else:
1168 # we need to look at the registered unpackers supported extensions
1169 format = _find_unpack_format(filename)
1170 if format is None:
1171 raise ReadError("Unknown archive format '{0}'".format(filename))
1172
1173 func = _UNPACK_FORMATS[format][1]
1174 kwargs = dict(_UNPACK_FORMATS[format][2])
1175 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001176
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001177
1178if hasattr(os, 'statvfs'):
1179
1180 __all__.append('disk_usage')
1181 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Raymond Hettinger5b798ab2015-08-17 22:04:45 -07001182 _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
1183 _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
1184 _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001185
1186 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001187 """Return disk usage statistics about the given path.
1188
Sandro Tosif8ae4fa2012-04-23 20:07:15 +02001189 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001190 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001191 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001192 st = os.statvfs(path)
1193 free = st.f_bavail * st.f_frsize
1194 total = st.f_blocks * st.f_frsize
1195 used = (st.f_blocks - st.f_bfree) * st.f_frsize
1196 return _ntuple_diskusage(total, used, free)
1197
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -07001198elif _WINDOWS:
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001199
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001200 __all__.append('disk_usage')
1201 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
1202
1203 def disk_usage(path):
1204 """Return disk usage statistics about the given path.
1205
Ezio Melotti30b9d5d2013-08-17 15:50:46 +03001206 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001207 'free', which are the amount of total, used and free space, in bytes.
1208 """
1209 total, free = nt._getdiskusage(path)
1210 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001211 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001212
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001213
Sandro Tosid902a142011-08-22 23:28:27 +02001214def chown(path, user=None, group=None):
1215 """Change owner user and group of the given path.
1216
1217 user and group can be the uid/gid or the user/group names, and in that case,
1218 they are converted to their respective uid/gid.
1219 """
1220
1221 if user is None and group is None:
1222 raise ValueError("user and/or group must be set")
1223
1224 _user = user
1225 _group = group
1226
1227 # -1 means don't change it
1228 if user is None:
1229 _user = -1
1230 # user can either be an int (the uid) or a string (the system username)
1231 elif isinstance(user, str):
1232 _user = _get_uid(user)
1233 if _user is None:
1234 raise LookupError("no such user: {!r}".format(user))
1235
1236 if group is None:
1237 _group = -1
1238 elif not isinstance(group, int):
1239 _group = _get_gid(group)
1240 if _group is None:
1241 raise LookupError("no such group: {!r}".format(group))
1242
1243 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001244
1245def get_terminal_size(fallback=(80, 24)):
1246 """Get the size of the terminal window.
1247
1248 For each of the two dimensions, the environment variable, COLUMNS
1249 and LINES respectively, is checked. If the variable is defined and
1250 the value is a positive integer, it is used.
1251
1252 When COLUMNS or LINES is not defined, which is the common case,
1253 the terminal connected to sys.__stdout__ is queried
1254 by invoking os.get_terminal_size.
1255
1256 If the terminal size cannot be successfully queried, either because
1257 the system doesn't support querying, or because we are not
1258 connected to a terminal, the value given in fallback parameter
1259 is used. Fallback defaults to (80, 24) which is the default
1260 size used by many terminal emulators.
1261
1262 The value returned is a named tuple of type os.terminal_size.
1263 """
1264 # columns, lines are the working values
1265 try:
1266 columns = int(os.environ['COLUMNS'])
1267 except (KeyError, ValueError):
1268 columns = 0
1269
1270 try:
1271 lines = int(os.environ['LINES'])
1272 except (KeyError, ValueError):
1273 lines = 0
1274
1275 # only query if necessary
1276 if columns <= 0 or lines <= 0:
1277 try:
1278 size = os.get_terminal_size(sys.__stdout__.fileno())
Serhiy Storchakad30829d2016-04-24 09:58:43 +03001279 except (AttributeError, ValueError, OSError):
1280 # stdout is None, closed, detached, or not a terminal, or
1281 # os.get_terminal_size() is unsupported
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001282 size = os.terminal_size(fallback)
1283 if columns <= 0:
1284 columns = size.columns
1285 if lines <= 0:
1286 lines = size.lines
1287
1288 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001289
Cheryl Sabella5680f652019-02-13 06:25:10 -05001290
1291# Check that a given file can be accessed with the correct mode.
1292# Additionally check that `file` is not a directory, as on Windows
1293# directories pass the os.access check.
1294def _access_check(fn, mode):
1295 return (os.path.exists(fn) and os.access(fn, mode)
1296 and not os.path.isdir(fn))
1297
1298
Brian Curtinc57a3452012-06-22 16:00:30 -05001299def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001300 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001301 conforms to the given mode on the PATH, or None if there is no such
1302 file.
1303
1304 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1305 of os.environ.get("PATH"), or can be overridden with a custom search
1306 path.
1307
1308 """
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001309 # If we're given a path with a directory part, look it up directly rather
1310 # than referring to PATH directories. This includes checking relative to the
1311 # current directory, e.g. ./script
1312 if os.path.dirname(cmd):
1313 if _access_check(cmd, mode):
1314 return cmd
1315 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001316
Cheryl Sabella5680f652019-02-13 06:25:10 -05001317 use_bytes = isinstance(cmd, bytes)
1318
Barry Warsaw618738b2013-04-16 11:05:03 -04001319 if path is None:
Victor Stinner228a3c92019-04-17 16:26:36 +02001320 path = os.environ.get("PATH", None)
1321 if path is None:
1322 try:
1323 path = os.confstr("CS_PATH")
1324 except (AttributeError, ValueError):
1325 # os.confstr() or CS_PATH is not available
1326 path = os.defpath
1327 # bpo-35755: Don't use os.defpath if the PATH environment variable is
Victor Stinner197f0442019-04-17 17:44:06 +02001328 # set to an empty string
Victor Stinner228a3c92019-04-17 16:26:36 +02001329
1330 # PATH='' doesn't match, whereas PATH=':' looks in the current directory
Barry Warsaw618738b2013-04-16 11:05:03 -04001331 if not path:
1332 return None
Victor Stinner228a3c92019-04-17 16:26:36 +02001333
Cheryl Sabella5680f652019-02-13 06:25:10 -05001334 if use_bytes:
1335 path = os.fsencode(path)
1336 path = path.split(os.fsencode(os.pathsep))
1337 else:
1338 path = os.fsdecode(path)
1339 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001340
1341 if sys.platform == "win32":
1342 # The current directory takes precedence on Windows.
Cheryl Sabella5680f652019-02-13 06:25:10 -05001343 curdir = os.curdir
1344 if use_bytes:
1345 curdir = os.fsencode(curdir)
1346 if curdir not in path:
1347 path.insert(0, curdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001348
1349 # PATHEXT is necessary to check on Windows.
1350 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
Cheryl Sabella5680f652019-02-13 06:25:10 -05001351 if use_bytes:
1352 pathext = [os.fsencode(ext) for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001353 # See if the given file matches any of the expected path extensions.
1354 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001355 # If it does match, only test that one, otherwise we have to try
1356 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001357 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1358 files = [cmd]
1359 else:
1360 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001361 else:
1362 # On other platforms you don't have things like PATHEXT to tell you
1363 # what file suffixes are executable, so just pass on cmd as-is.
1364 files = [cmd]
1365
1366 seen = set()
1367 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001368 normdir = os.path.normcase(dir)
1369 if not normdir in seen:
1370 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001371 for thefile in files:
1372 name = os.path.join(dir, thefile)
1373 if _access_check(name, mode):
1374 return name
1375 return None