blob: a4aa0dfdd10b097aad55757ba818bd5376a6fa42 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020013import io
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +020014
15try:
16 import zlib
17 del zlib
18 _ZLIB_SUPPORTED = True
19except ImportError:
20 _ZLIB_SUPPORTED = False
Tarek Ziadé396fad72010-02-23 05:30:31 +000021
22try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000023 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010024 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000025 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040026except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000027 _BZ2_SUPPORTED = False
28
29try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030030 import lzma
31 del lzma
32 _LZMA_SUPPORTED = True
33except ImportError:
34 _LZMA_SUPPORTED = False
35
36try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000037 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040038except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000039 getpwnam = None
40
41try:
42 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040043except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000044 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000045
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070046_WINDOWS = os.name == 'nt'
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020047posix = nt = None
48if os.name == 'posix':
49 import posix
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070050elif _WINDOWS:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020051 import nt
52
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070053COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 16 * 1024
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020054_HAS_SENDFILE = posix and hasattr(os, "sendfile")
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070055_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020056
Tarek Ziadéc3399782010-02-23 05:39:18 +000057__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
58 "copytree", "move", "rmtree", "Error", "SpecialFileError",
59 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000060 "register_archive_format", "unregister_archive_format",
61 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020062 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020063 "ignore_patterns", "chown", "which", "get_terminal_size",
64 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020065 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000066
Andrew Svetlov3438fa42012-12-17 23:35:18 +020067class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000068 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000069
Hynek Schlawack48653762012-10-07 12:49:58 +020070class SameFileError(Error):
71 """Raised when source and destination are the same file."""
72
Andrew Svetlov3438fa42012-12-17 23:35:18 +020073class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000074 """Raised when trying to do a kind of operation (e.g. copying) which is
75 not supported on a special file (e.g. a named pipe)"""
76
Andrew Svetlov3438fa42012-12-17 23:35:18 +020077class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000078 """Raised when a command could not be executed"""
79
Andrew Svetlov3438fa42012-12-17 23:35:18 +020080class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000081 """Raised when an archive cannot be read"""
82
83class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030084 """Raised when a registry operation with the archiving
Raymond Hettinger15f44ab2016-08-30 10:47:49 -070085 and unpacking registries fails"""
Tarek Ziadé6ac91722010-04-28 17:51:36 +000086
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020087class _GiveupOnFastCopy(Exception):
88 """Raised as a signal to fallback on using raw read()/write()
89 file copy when fast-copy functions fail to do so.
90 """
Tarek Ziadé6ac91722010-04-28 17:51:36 +000091
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070092def _fastcopy_fcopyfile(fsrc, fdst, flags):
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020093 """Copy a regular file content or metadata by using high-performance
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070094 fcopyfile(3) syscall (macOS).
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020095 """
96 try:
97 infd = fsrc.fileno()
98 outfd = fdst.fileno()
99 except Exception as err:
100 raise _GiveupOnFastCopy(err) # not a regular file
101
102 try:
103 posix._fcopyfile(infd, outfd, flags)
104 except OSError as err:
105 err.filename = fsrc.name
106 err.filename2 = fdst.name
107 if err.errno in {errno.EINVAL, errno.ENOTSUP}:
108 raise _GiveupOnFastCopy(err)
109 else:
110 raise err from None
111
112def _fastcopy_sendfile(fsrc, fdst):
113 """Copy data from one regular mmap-like fd to another by using
114 high-performance sendfile(2) syscall.
115 This should work on Linux >= 2.6.33 and Solaris only.
116 """
117 # Note: copyfileobj() is left alone in order to not introduce any
118 # unexpected breakage. Possible risks by using zero-copy calls
119 # in copyfileobj() are:
120 # - fdst cannot be open in "a"(ppend) mode
121 # - fsrc and fdst may be open in "t"(ext) mode
122 # - fsrc may be a BufferedReader (which hides unread data in a buffer),
123 # GzipFile (which decompresses data), HTTPResponse (which decodes
124 # chunks).
125 # - possibly others (e.g. encrypted fs/partition?)
126 global _HAS_SENDFILE
127 try:
128 infd = fsrc.fileno()
129 outfd = fdst.fileno()
130 except Exception as err:
131 raise _GiveupOnFastCopy(err) # not a regular file
132
133 # Hopefully the whole file will be copied in a single call.
134 # sendfile() is called in a loop 'till EOF is reached (0 return)
135 # so a bufsize smaller or bigger than the actual file size
136 # should not make any difference, also in case the file content
137 # changes while being copied.
138 try:
139 blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB
140 except Exception:
141 blocksize = 2 ** 27 # 128MB
142
143 offset = 0
144 while True:
145 try:
146 sent = os.sendfile(outfd, infd, offset, blocksize)
147 except OSError as err:
148 # ...in oder to have a more informative exception.
149 err.filename = fsrc.name
150 err.filename2 = fdst.name
151
152 if err.errno == errno.ENOTSOCK:
153 # sendfile() on this platform (probably Linux < 2.6.33)
154 # does not support copies between regular files (only
155 # sockets).
156 _HAS_SENDFILE = False
157 raise _GiveupOnFastCopy(err)
158
159 if err.errno == errno.ENOSPC: # filesystem is full
160 raise err from None
161
162 # Give up on first call and if no data was copied.
163 if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
164 raise _GiveupOnFastCopy(err)
165
166 raise err
167 else:
168 if sent == 0:
169 break # EOF
170 offset += sent
171
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700172def _copyfileobj_readinto(fsrc, fdst, length=COPY_BUFSIZE):
173 """readinto()/memoryview() based variant of copyfileobj().
174 *fsrc* must support readinto() method and both files must be
175 open in binary mode.
176 """
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200177 # Localize variable access to minimize overhead.
178 fsrc_readinto = fsrc.readinto
179 fdst_write = fdst.write
180 with memoryview(bytearray(length)) as mv:
181 while True:
182 n = fsrc_readinto(mv)
183 if not n:
184 break
185 elif n < length:
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700186 with mv[:n] as smv:
187 fdst.write(smv)
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200188 else:
189 fdst_write(mv)
190
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200191def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE):
Greg Stein42bb8b32000-07-12 09:55:30 +0000192 """copy data from file-like object fsrc to file-like object fdst"""
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700193 # Localize variable access to minimize overhead.
194 fsrc_read = fsrc.read
195 fdst_write = fdst.write
196 while True:
197 buf = fsrc_read(length)
198 if not buf:
199 break
200 fdst_write(buf)
Greg Stein42bb8b32000-07-12 09:55:30 +0000201
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000202def _samefile(src, dst):
203 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000204 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +0000205 try:
206 return os.path.samefile(src, dst)
207 except OSError:
208 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000209
210 # All other platforms: check for same pathname.
211 return (os.path.normcase(os.path.abspath(src)) ==
212 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +0000213
Larry Hastingsb4038062012-07-15 10:57:38 -0700214def copyfile(src, dst, *, follow_symlinks=True):
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700215 """Copy data from src to dst in the most efficient way possible.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100216
Larry Hastingsb4038062012-07-15 10:57:38 -0700217 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +0100218 symlink will be created instead of copying the file it points to.
219
220 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000221 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +0200222 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000223
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700224 file_size = 0
225 for i, fn in enumerate([src, dst]):
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000226 try:
227 st = os.stat(fn)
228 except OSError:
229 # File most likely does not exist
230 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000231 else:
232 # XXX What about other special files? (sockets, devices...)
233 if stat.S_ISFIFO(st.st_mode):
234 raise SpecialFileError("`%s` is a named pipe" % fn)
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700235 if _WINDOWS and i == 0:
236 file_size = st.st_size
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000237
Larry Hastingsb4038062012-07-15 10:57:38 -0700238 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100239 os.symlink(os.readlink(src), dst)
240 else:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200241 with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700242 # macOS
243 if _HAS_FCOPYFILE:
244 try:
245 _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA)
246 return dst
247 except _GiveupOnFastCopy:
248 pass
249 # Linux / Solaris
250 elif _HAS_SENDFILE:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200251 try:
252 _fastcopy_sendfile(fsrc, fdst)
253 return dst
254 except _GiveupOnFastCopy:
255 pass
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700256 # Windows, see:
257 # https://github.com/python/cpython/pull/7160#discussion_r195405230
258 elif _WINDOWS and file_size > 0:
259 _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE))
260 return dst
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200261
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700262 copyfileobj(fsrc, fdst)
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200263
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500264 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000265
Larry Hastingsb4038062012-07-15 10:57:38 -0700266def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100267 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000268
Larry Hastingsb4038062012-07-15 10:57:38 -0700269 If follow_symlinks is not set, symlinks aren't followed if and only
270 if both `src` and `dst` are symlinks. If `lchmod` isn't available
271 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100272
273 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700274 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100275 if hasattr(os, 'lchmod'):
276 stat_func, chmod_func = os.lstat, os.lchmod
277 else:
278 return
279 elif hasattr(os, 'chmod'):
280 stat_func, chmod_func = os.stat, os.chmod
281 else:
282 return
283
284 st = stat_func(src)
285 chmod_func(dst, stat.S_IMODE(st.st_mode))
286
Larry Hastingsad5ae042012-07-14 17:55:11 -0700287if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700288 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700289 """Copy extended filesystem attributes from `src` to `dst`.
290
291 Overwrite existing attributes.
292
Larry Hastingsb4038062012-07-15 10:57:38 -0700293 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700294
295 """
296
Hynek Schlawack0beab052013-02-05 08:22:44 +0100297 try:
298 names = os.listxattr(src, follow_symlinks=follow_symlinks)
299 except OSError as e:
300 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
301 raise
302 return
303 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700304 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700305 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
306 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700307 except OSError as e:
308 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
309 raise
310else:
311 def _copyxattr(*args, **kwargs):
312 pass
313
Larry Hastingsb4038062012-07-15 10:57:38 -0700314def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100315 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
316
Larry Hastingsb4038062012-07-15 10:57:38 -0700317 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100318 only if both `src` and `dst` are symlinks.
319
320 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700321 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100322 pass
323
Larry Hastings9cf065c2012-06-22 16:30:09 -0700324 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700325 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700326 if follow:
327 # use the real function if it exists
328 def lookup(name):
329 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100330 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700331 # use the real function only if it exists
332 # *and* it supports follow_symlinks
333 def lookup(name):
334 fn = getattr(os, name, _nop)
335 if fn in os.supports_follow_symlinks:
336 return fn
337 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100338
Larry Hastings9cf065c2012-06-22 16:30:09 -0700339 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000340 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700341 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
342 follow_symlinks=follow)
343 try:
344 lookup("chmod")(dst, mode, follow_symlinks=follow)
345 except NotImplementedError:
346 # if we got a NotImplementedError, it's because
347 # * follow_symlinks=False,
348 # * lchown() is unavailable, and
349 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300350 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700351 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
352 # (it returned ENOSUP.)
353 # therefore we're out of options--we simply cannot chown the
354 # symlink. give up, suppress the error.
355 # (which is what shutil always did in this circumstance.)
356 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100357 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000358 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700359 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000360 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700361 for err in 'EOPNOTSUPP', 'ENOTSUP':
362 if hasattr(errno, err) and why.errno == getattr(errno, err):
363 break
364 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000365 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700366 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200367
Larry Hastingsb4038062012-07-15 10:57:38 -0700368def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500369 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000370
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000371 The destination may be a directory.
372
Larry Hastingsb4038062012-07-15 10:57:38 -0700373 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100374 resembles GNU's "cp -P src dst".
375
Hynek Schlawack48653762012-10-07 12:49:58 +0200376 If source and destination are the same file, a SameFileError will be
377 raised.
378
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000379 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000380 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000381 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700382 copyfile(src, dst, follow_symlinks=follow_symlinks)
383 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500384 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000385
Larry Hastingsb4038062012-07-15 10:57:38 -0700386def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500387 """Copy data and all stat info ("cp -p src dst"). Return the file's
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200388 destination.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000389
390 The destination may be a directory.
391
Larry Hastingsb4038062012-07-15 10:57:38 -0700392 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100393 resembles GNU's "cp -P src dst".
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000394 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000395 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000396 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700397 copyfile(src, dst, follow_symlinks=follow_symlinks)
398 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500399 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000400
Georg Brandl2ee470f2008-07-16 12:55:28 +0000401def ignore_patterns(*patterns):
402 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000403
Georg Brandl2ee470f2008-07-16 12:55:28 +0000404 Patterns is a sequence of glob-style patterns
405 that are used to exclude files"""
406 def _ignore_patterns(path, names):
407 ignored_names = []
408 for pattern in patterns:
409 ignored_names.extend(fnmatch.filter(names, pattern))
410 return set(ignored_names)
411 return _ignore_patterns
412
Tarek Ziadéfb437512010-04-20 08:57:33 +0000413def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
414 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000415 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000416
417 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000418 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000419
420 If the optional symlinks flag is true, symbolic links in the
421 source tree result in symbolic links in the destination tree; if
422 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000423 links are copied. If the file pointed by the symlink doesn't
424 exist, an exception will be added in the list of errors raised in
425 an Error exception at the end of the copy process.
426
427 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000428 want to silence this exception. Notice that this has no effect on
429 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000430
Georg Brandl2ee470f2008-07-16 12:55:28 +0000431 The optional ignore argument is a callable. If given, it
432 is called with the `src` parameter, which is the directory
433 being visited by copytree(), and `names` which is the list of
434 `src` contents, as returned by os.listdir():
435
436 callable(src, names) -> ignored_names
437
438 Since copytree() is called recursively, the callable will be
439 called once for each directory that is copied. It returns a
440 list of names relative to the `src` directory that should
441 not be copied.
442
Tarek Ziadé5340db32010-04-19 22:30:51 +0000443 The optional copy_function argument is a callable that will be used
444 to copy each file. It will be called with the source path and the
445 destination path as arguments. By default, copy2() is used, but any
446 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000447
448 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000449 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000450 if ignore is not None:
451 ignored_names = ignore(src, names)
452 else:
453 ignored_names = set()
454
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000455 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000456 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000457 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000458 if name in ignored_names:
459 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000460 srcname = os.path.join(src, name)
461 dstname = os.path.join(dst, name)
462 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000463 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000464 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000465 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100466 # We can't just leave it to `copy_function` because legacy
467 # code with a custom `copy_function` may rely on copytree
468 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000469 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700470 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000471 else:
472 # ignore dangling symlink if the flag is on
473 if not os.path.exists(linkto) and ignore_dangling_symlinks:
474 continue
475 # otherwise let the copy occurs. copy2 will raise an error
Berker Peksag5a294d82015-07-25 14:53:48 +0300476 if os.path.isdir(srcname):
477 copytree(srcname, dstname, symlinks, ignore,
478 copy_function)
479 else:
480 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000481 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000482 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000483 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000484 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000485 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000486 # catch the Error from the recursive copytree so that we can
487 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000488 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000489 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200490 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000491 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000492 try:
493 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000494 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200495 # Copying file access times may fail on Windows
Berker Peksag884afd92014-12-10 02:50:32 +0200496 if getattr(why, 'winerror', None) is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200497 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000498 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000499 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500500 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000501
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200502# version vulnerable to race conditions
503def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000504 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200505 with os.scandir(path) as scandir_it:
506 entries = list(scandir_it)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000507 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200508 onerror(os.scandir, path, sys.exc_info())
509 entries = []
510 for entry in entries:
511 fullname = entry.path
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000512 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200513 is_dir = entry.is_dir(follow_symlinks=False)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200514 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200515 is_dir = False
516 if is_dir:
517 try:
518 if entry.is_symlink():
519 # This can only happen if someone replaces
520 # a directory with a symlink after the call to
521 # os.scandir or entry.is_dir above.
522 raise OSError("Cannot call rmtree on a symbolic link")
523 except OSError:
524 onerror(os.path.islink, fullname, sys.exc_info())
525 continue
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200526 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000527 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000528 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200529 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200530 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200531 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000532 try:
533 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200534 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000535 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000536
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200537# Version using fd-based APIs to protect against races
538def _rmtree_safe_fd(topfd, path, onerror):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200539 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200540 with os.scandir(topfd) as scandir_it:
541 entries = list(scandir_it)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100542 except OSError as err:
543 err.filename = path
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200544 onerror(os.scandir, path, sys.exc_info())
545 return
546 for entry in entries:
547 fullname = os.path.join(path, entry.name)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200548 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200549 is_dir = entry.is_dir(follow_symlinks=False)
550 if is_dir:
551 orig_st = entry.stat(follow_symlinks=False)
552 is_dir = stat.S_ISDIR(orig_st.st_mode)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100553 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200554 is_dir = False
555 if is_dir:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200556 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200557 dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100558 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200559 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200560 else:
561 try:
562 if os.path.samestat(orig_st, os.fstat(dirfd)):
563 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200564 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200565 os.rmdir(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100566 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200567 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100568 else:
569 try:
570 # This can only happen if someone replaces
571 # a directory with a symlink after the call to
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200572 # os.scandir or stat.S_ISDIR above.
Hynek Schlawackb5501102012-12-10 09:11:25 +0100573 raise OSError("Cannot call rmtree on a symbolic "
574 "link")
575 except OSError:
576 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200577 finally:
578 os.close(dirfd)
579 else:
580 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200581 os.unlink(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100582 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200583 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200584
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200585_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
586 os.supports_dir_fd and
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200587 os.scandir in os.supports_fd and
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200588 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000589
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200590def rmtree(path, ignore_errors=False, onerror=None):
591 """Recursively delete a directory tree.
592
593 If ignore_errors is set, errors are ignored; otherwise, if onerror
594 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200595 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200596 path is the argument to that function that caused it to fail; and
597 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
598 is false and onerror is None, an exception is raised.
599
600 """
601 if ignore_errors:
602 def onerror(*args):
603 pass
604 elif onerror is None:
605 def onerror(*args):
606 raise
607 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200608 # While the unsafe rmtree works fine on bytes, the fd based does not.
609 if isinstance(path, bytes):
610 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200611 # Note: To guard against symlink races, we use the standard
612 # lstat()/open()/fstat() trick.
613 try:
614 orig_st = os.lstat(path)
615 except Exception:
616 onerror(os.lstat, path, sys.exc_info())
617 return
618 try:
619 fd = os.open(path, os.O_RDONLY)
620 except Exception:
621 onerror(os.lstat, path, sys.exc_info())
622 return
623 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100624 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200625 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200626 try:
627 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200628 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200629 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200630 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100631 try:
632 # symlinks to directories are forbidden, see bug #1669
633 raise OSError("Cannot call rmtree on a symbolic link")
634 except OSError:
635 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200636 finally:
637 os.close(fd)
638 else:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200639 try:
640 if os.path.islink(path):
641 # symlinks to directories are forbidden, see bug #1669
642 raise OSError("Cannot call rmtree on a symbolic link")
643 except OSError:
644 onerror(os.path.islink, path, sys.exc_info())
645 # can't continue even if onerror hook returns
646 return
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200647 return _rmtree_unsafe(path, onerror)
648
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000649# Allow introspection of whether or not the hardening against symlink
650# attacks is supported on the current platform
651rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000652
Christian Heimesada8c3b2008-03-18 18:26:33 +0000653def _basename(path):
654 # A basename() variant which first strips the trailing slash, if present.
655 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200656 sep = os.path.sep + (os.path.altsep or '')
657 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000658
R David Murray6ffface2014-06-11 14:40:13 -0400659def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000660 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500661 similar to the Unix "mv" command. Return the file or directory's
662 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000663
664 If the destination is a directory or a symlink to a directory, the source
665 is moved inside the directory. The destination path must not already
666 exist.
667
668 If the destination already exists but is not a directory, it may be
669 overwritten depending on os.rename() semantics.
670
671 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100672 Otherwise, src is copied to the destination and then removed. Symlinks are
673 recreated under the new name if os.rename() fails because of cross
674 filesystem renames.
675
R David Murray6ffface2014-06-11 14:40:13 -0400676 The optional `copy_function` argument is a callable that will be used
677 to copy the source or it will be delegated to `copytree`.
678 By default, copy2() is used, but any function that supports the same
679 signature (like copy()) can be used.
680
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000681 A lot more could be done here... A look at a mv.c shows a lot of
682 the issues this implementation glosses over.
683
684 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000685 real_dst = dst
686 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200687 if _samefile(src, dst):
688 # We might be on a case insensitive filesystem,
689 # perform the rename anyway.
690 os.rename(src, dst)
691 return
692
Christian Heimesada8c3b2008-03-18 18:26:33 +0000693 real_dst = os.path.join(dst, _basename(src))
694 if os.path.exists(real_dst):
695 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000696 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000697 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200698 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100699 if os.path.islink(src):
700 linkto = os.readlink(src)
701 os.symlink(linkto, real_dst)
702 os.unlink(src)
703 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000704 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400705 raise Error("Cannot move a directory '%s' into itself"
706 " '%s'." % (src, dst))
707 copytree(src, real_dst, copy_function=copy_function,
708 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000709 rmtree(src)
710 else:
R David Murray6ffface2014-06-11 14:40:13 -0400711 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000712 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500713 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000714
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000715def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300716 src = os.path.abspath(src)
717 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000718 if not src.endswith(os.path.sep):
719 src += os.path.sep
720 if not dst.endswith(os.path.sep):
721 dst += os.path.sep
722 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000723
724def _get_gid(name):
725 """Returns a gid, given a group name."""
726 if getgrnam is None or name is None:
727 return None
728 try:
729 result = getgrnam(name)
730 except KeyError:
731 result = None
732 if result is not None:
733 return result[2]
734 return None
735
736def _get_uid(name):
737 """Returns an uid, given a user name."""
738 if getpwnam is None or name is None:
739 return None
740 try:
741 result = getpwnam(name)
742 except KeyError:
743 result = None
744 if result is not None:
745 return result[2]
746 return None
747
748def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
749 owner=None, group=None, logger=None):
750 """Create a (possibly compressed) tar file from all the files under
751 'base_dir'.
752
Serhiy Storchaka11213772014-08-06 18:50:19 +0300753 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000754
755 'owner' and 'group' can be used to define an owner and a group for the
756 archive that is being built. If not provided, the current owner and group
757 will be used.
758
Éric Araujo4433a5f2010-12-15 20:26:30 +0000759 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300760 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000761
762 Returns the output filename.
763 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200764 if compress is None:
765 tar_compression = ''
766 elif _ZLIB_SUPPORTED and compress == 'gzip':
767 tar_compression = 'gz'
768 elif _BZ2_SUPPORTED and compress == 'bzip2':
769 tar_compression = 'bz2'
770 elif _LZMA_SUPPORTED and compress == 'xz':
771 tar_compression = 'xz'
772 else:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000773 raise ValueError("bad value for 'compress', or compression format not "
774 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000775
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200776 import tarfile # late import for breaking circular dependency
777
778 compress_ext = '.' + tar_compression if compress else ''
779 archive_name = base_name + '.tar' + compress_ext
Tarek Ziadé396fad72010-02-23 05:30:31 +0000780 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000781
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200782 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000783 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200784 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000785 if not dry_run:
786 os.makedirs(archive_dir)
787
Tarek Ziadé396fad72010-02-23 05:30:31 +0000788 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000789 if logger is not None:
790 logger.info('Creating tar archive')
791
792 uid = _get_uid(owner)
793 gid = _get_gid(group)
794
795 def _set_uid_gid(tarinfo):
796 if gid is not None:
797 tarinfo.gid = gid
798 tarinfo.gname = group
799 if uid is not None:
800 tarinfo.uid = uid
801 tarinfo.uname = owner
802 return tarinfo
803
804 if not dry_run:
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200805 tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000806 try:
807 tar.add(base_dir, filter=_set_uid_gid)
808 finally:
809 tar.close()
810
Tarek Ziadé396fad72010-02-23 05:30:31 +0000811 return archive_name
812
Tarek Ziadé396fad72010-02-23 05:30:31 +0000813def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
814 """Create a zip file from all the files under 'base_dir'.
815
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200816 The output zip file will be named 'base_name' + ".zip". Returns the
817 name of the output zip file.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000818 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200819 import zipfile # late import for breaking circular dependency
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400820
Tarek Ziadé396fad72010-02-23 05:30:31 +0000821 zip_filename = base_name + ".zip"
822 archive_dir = os.path.dirname(base_name)
823
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200824 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000825 if logger is not None:
826 logger.info("creating %s", archive_dir)
827 if not dry_run:
828 os.makedirs(archive_dir)
829
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400830 if logger is not None:
831 logger.info("creating '%s' and adding '%s' to it",
832 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000833
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400834 if not dry_run:
835 with zipfile.ZipFile(zip_filename, "w",
836 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300837 path = os.path.normpath(base_dir)
Serhiy Storchaka666de772016-10-23 15:55:09 +0300838 if path != os.curdir:
839 zf.write(path, path)
840 if logger is not None:
841 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400842 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300843 for name in sorted(dirnames):
844 path = os.path.normpath(os.path.join(dirpath, name))
845 zf.write(path, path)
846 if logger is not None:
847 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400848 for name in filenames:
849 path = os.path.normpath(os.path.join(dirpath, name))
850 if os.path.isfile(path):
851 zf.write(path, path)
852 if logger is not None:
853 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000854
855 return zip_filename
856
857_ARCHIVE_FORMATS = {
Tarek Ziadé396fad72010-02-23 05:30:31 +0000858 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200859}
860
861if _ZLIB_SUPPORTED:
862 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
863 "gzip'ed tar-file")
864 _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000865
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000866if _BZ2_SUPPORTED:
867 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
868 "bzip2'ed tar-file")
869
Serhiy Storchaka11213772014-08-06 18:50:19 +0300870if _LZMA_SUPPORTED:
871 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
872 "xz'ed tar-file")
873
Tarek Ziadé396fad72010-02-23 05:30:31 +0000874def get_archive_formats():
875 """Returns a list of supported formats for archiving and unarchiving.
876
877 Each element of the returned sequence is a tuple (name, description)
878 """
879 formats = [(name, registry[2]) for name, registry in
880 _ARCHIVE_FORMATS.items()]
881 formats.sort()
882 return formats
883
884def register_archive_format(name, function, extra_args=None, description=''):
885 """Registers an archive format.
886
887 name is the name of the format. function is the callable that will be
888 used to create archives. If provided, extra_args is a sequence of
889 (name, value) tuples that will be passed as arguments to the callable.
890 description can be provided to describe the format, and will be returned
891 by the get_archive_formats() function.
892 """
893 if extra_args is None:
894 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200895 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000896 raise TypeError('The %s object is not callable' % function)
897 if not isinstance(extra_args, (tuple, list)):
898 raise TypeError('extra_args needs to be a sequence')
899 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200900 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000901 raise TypeError('extra_args elements are : (arg_name, value)')
902
903 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
904
905def unregister_archive_format(name):
906 del _ARCHIVE_FORMATS[name]
907
908def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
909 dry_run=0, owner=None, group=None, logger=None):
910 """Create an archive file (eg. zip or tar).
911
912 'base_name' is the name of the file to create, minus any format-specific
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200913 extension; 'format' is the archive format: one of "zip", "tar", "gztar",
914 "bztar", or "xztar". Or any other registered format.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000915
916 'root_dir' is a directory that will be the root directory of the
917 archive; ie. we typically chdir into 'root_dir' before creating the
918 archive. 'base_dir' is the directory where we start archiving from;
919 ie. 'base_dir' will be the common prefix of all files and
920 directories in the archive. 'root_dir' and 'base_dir' both default
921 to the current directory. Returns the name of the archive file.
922
923 'owner' and 'group' are used when creating a tar archive. By default,
924 uses the current owner and group.
925 """
926 save_cwd = os.getcwd()
927 if root_dir is not None:
928 if logger is not None:
929 logger.debug("changing into '%s'", root_dir)
930 base_name = os.path.abspath(base_name)
931 if not dry_run:
932 os.chdir(root_dir)
933
934 if base_dir is None:
935 base_dir = os.curdir
936
937 kwargs = {'dry_run': dry_run, 'logger': logger}
938
939 try:
940 format_info = _ARCHIVE_FORMATS[format]
941 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300942 raise ValueError("unknown archive format '%s'" % format) from None
Tarek Ziadé396fad72010-02-23 05:30:31 +0000943
944 func = format_info[0]
945 for arg, val in format_info[1]:
946 kwargs[arg] = val
947
948 if format != 'zip':
949 kwargs['owner'] = owner
950 kwargs['group'] = group
951
952 try:
953 filename = func(base_name, base_dir, **kwargs)
954 finally:
955 if root_dir is not None:
956 if logger is not None:
957 logger.debug("changing back to '%s'", save_cwd)
958 os.chdir(save_cwd)
959
960 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000961
962
963def get_unpack_formats():
964 """Returns a list of supported formats for unpacking.
965
966 Each element of the returned sequence is a tuple
967 (name, extensions, description)
968 """
969 formats = [(name, info[0], info[3]) for name, info in
970 _UNPACK_FORMATS.items()]
971 formats.sort()
972 return formats
973
974def _check_unpack_options(extensions, function, extra_args):
975 """Checks what gets registered as an unpacker."""
976 # first make sure no other unpacker is registered for this extension
977 existing_extensions = {}
978 for name, info in _UNPACK_FORMATS.items():
979 for ext in info[0]:
980 existing_extensions[ext] = name
981
982 for extension in extensions:
983 if extension in existing_extensions:
984 msg = '%s is already registered for "%s"'
985 raise RegistryError(msg % (extension,
986 existing_extensions[extension]))
987
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200988 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000989 raise TypeError('The registered function must be a callable')
990
991
992def register_unpack_format(name, extensions, function, extra_args=None,
993 description=''):
994 """Registers an unpack format.
995
996 `name` is the name of the format. `extensions` is a list of extensions
997 corresponding to the format.
998
999 `function` is the callable that will be
1000 used to unpack archives. The callable will receive archives to unpack.
1001 If it's unable to handle an archive, it needs to raise a ReadError
1002 exception.
1003
1004 If provided, `extra_args` is a sequence of
1005 (name, value) tuples that will be passed as arguments to the callable.
1006 description can be provided to describe the format, and will be returned
1007 by the get_unpack_formats() function.
1008 """
1009 if extra_args is None:
1010 extra_args = []
1011 _check_unpack_options(extensions, function, extra_args)
1012 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
1013
1014def unregister_unpack_format(name):
Martin Pantereb995702016-07-28 01:11:04 +00001015 """Removes the pack format from the registry."""
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001016 del _UNPACK_FORMATS[name]
1017
1018def _ensure_directory(path):
1019 """Ensure that the parent directory of `path` exists"""
1020 dirname = os.path.dirname(path)
1021 if not os.path.isdir(dirname):
1022 os.makedirs(dirname)
1023
1024def _unpack_zipfile(filename, extract_dir):
1025 """Unpack zip `filename` to `extract_dir`
1026 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001027 import zipfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001028
1029 if not zipfile.is_zipfile(filename):
1030 raise ReadError("%s is not a zip file" % filename)
1031
1032 zip = zipfile.ZipFile(filename)
1033 try:
1034 for info in zip.infolist():
1035 name = info.filename
1036
1037 # don't extract absolute paths or ones with .. in them
1038 if name.startswith('/') or '..' in name:
1039 continue
1040
1041 target = os.path.join(extract_dir, *name.split('/'))
1042 if not target:
1043 continue
1044
1045 _ensure_directory(target)
1046 if not name.endswith('/'):
1047 # file
1048 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +02001049 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001050 try:
1051 f.write(data)
1052 finally:
1053 f.close()
1054 del data
1055 finally:
1056 zip.close()
1057
1058def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +03001059 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001060 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001061 import tarfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001062 try:
1063 tarobj = tarfile.open(filename)
1064 except tarfile.TarError:
1065 raise ReadError(
1066 "%s is not a compressed or uncompressed tar file" % filename)
1067 try:
1068 tarobj.extractall(extract_dir)
1069 finally:
1070 tarobj.close()
1071
1072_UNPACK_FORMATS = {
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001073 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001074 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"),
1075}
1076
1077if _ZLIB_SUPPORTED:
1078 _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
1079 "gzip'ed tar-file")
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001080
Tarek Ziadéffa155a2010-04-29 13:34:35 +00001081if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +03001082 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +00001083 "bzip2'ed tar-file")
1084
Serhiy Storchaka11213772014-08-06 18:50:19 +03001085if _LZMA_SUPPORTED:
1086 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
1087 "xz'ed tar-file")
1088
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001089def _find_unpack_format(filename):
1090 for name, info in _UNPACK_FORMATS.items():
1091 for extension in info[0]:
1092 if filename.endswith(extension):
1093 return name
1094 return None
1095
1096def unpack_archive(filename, extract_dir=None, format=None):
1097 """Unpack an archive.
1098
1099 `filename` is the name of the archive.
1100
1101 `extract_dir` is the name of the target directory, where the archive
1102 is unpacked. If not provided, the current working directory is used.
1103
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001104 `format` is the archive format: one of "zip", "tar", "gztar", "bztar",
1105 or "xztar". Or any other registered format. If not provided,
1106 unpack_archive will use the filename extension and see if an unpacker
1107 was registered for that extension.
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001108
1109 In case none is found, a ValueError is raised.
1110 """
1111 if extract_dir is None:
1112 extract_dir = os.getcwd()
1113
Jelle Zijlstraa12df7b2017-05-05 14:27:12 -07001114 extract_dir = os.fspath(extract_dir)
1115 filename = os.fspath(filename)
1116
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001117 if format is not None:
1118 try:
1119 format_info = _UNPACK_FORMATS[format]
1120 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001121 raise ValueError("Unknown unpack format '{0}'".format(format)) from None
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001122
Nick Coghlanabf202d2011-03-16 13:52:20 -04001123 func = format_info[1]
1124 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001125 else:
1126 # we need to look at the registered unpackers supported extensions
1127 format = _find_unpack_format(filename)
1128 if format is None:
1129 raise ReadError("Unknown archive format '{0}'".format(filename))
1130
1131 func = _UNPACK_FORMATS[format][1]
1132 kwargs = dict(_UNPACK_FORMATS[format][2])
1133 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001134
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001135
1136if hasattr(os, 'statvfs'):
1137
1138 __all__.append('disk_usage')
1139 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Raymond Hettinger5b798ab2015-08-17 22:04:45 -07001140 _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
1141 _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
1142 _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001143
1144 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001145 """Return disk usage statistics about the given path.
1146
Sandro Tosif8ae4fa2012-04-23 20:07:15 +02001147 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001148 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001149 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001150 st = os.statvfs(path)
1151 free = st.f_bavail * st.f_frsize
1152 total = st.f_blocks * st.f_frsize
1153 used = (st.f_blocks - st.f_bfree) * st.f_frsize
1154 return _ntuple_diskusage(total, used, free)
1155
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -07001156elif _WINDOWS:
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001157
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001158 __all__.append('disk_usage')
1159 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
1160
1161 def disk_usage(path):
1162 """Return disk usage statistics about the given path.
1163
Ezio Melotti30b9d5d2013-08-17 15:50:46 +03001164 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001165 'free', which are the amount of total, used and free space, in bytes.
1166 """
1167 total, free = nt._getdiskusage(path)
1168 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001169 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001170
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001171
Sandro Tosid902a142011-08-22 23:28:27 +02001172def chown(path, user=None, group=None):
1173 """Change owner user and group of the given path.
1174
1175 user and group can be the uid/gid or the user/group names, and in that case,
1176 they are converted to their respective uid/gid.
1177 """
1178
1179 if user is None and group is None:
1180 raise ValueError("user and/or group must be set")
1181
1182 _user = user
1183 _group = group
1184
1185 # -1 means don't change it
1186 if user is None:
1187 _user = -1
1188 # user can either be an int (the uid) or a string (the system username)
1189 elif isinstance(user, str):
1190 _user = _get_uid(user)
1191 if _user is None:
1192 raise LookupError("no such user: {!r}".format(user))
1193
1194 if group is None:
1195 _group = -1
1196 elif not isinstance(group, int):
1197 _group = _get_gid(group)
1198 if _group is None:
1199 raise LookupError("no such group: {!r}".format(group))
1200
1201 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001202
1203def get_terminal_size(fallback=(80, 24)):
1204 """Get the size of the terminal window.
1205
1206 For each of the two dimensions, the environment variable, COLUMNS
1207 and LINES respectively, is checked. If the variable is defined and
1208 the value is a positive integer, it is used.
1209
1210 When COLUMNS or LINES is not defined, which is the common case,
1211 the terminal connected to sys.__stdout__ is queried
1212 by invoking os.get_terminal_size.
1213
1214 If the terminal size cannot be successfully queried, either because
1215 the system doesn't support querying, or because we are not
1216 connected to a terminal, the value given in fallback parameter
1217 is used. Fallback defaults to (80, 24) which is the default
1218 size used by many terminal emulators.
1219
1220 The value returned is a named tuple of type os.terminal_size.
1221 """
1222 # columns, lines are the working values
1223 try:
1224 columns = int(os.environ['COLUMNS'])
1225 except (KeyError, ValueError):
1226 columns = 0
1227
1228 try:
1229 lines = int(os.environ['LINES'])
1230 except (KeyError, ValueError):
1231 lines = 0
1232
1233 # only query if necessary
1234 if columns <= 0 or lines <= 0:
1235 try:
1236 size = os.get_terminal_size(sys.__stdout__.fileno())
Serhiy Storchakad30829d2016-04-24 09:58:43 +03001237 except (AttributeError, ValueError, OSError):
1238 # stdout is None, closed, detached, or not a terminal, or
1239 # os.get_terminal_size() is unsupported
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001240 size = os.terminal_size(fallback)
1241 if columns <= 0:
1242 columns = size.columns
1243 if lines <= 0:
1244 lines = size.lines
1245
1246 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001247
1248def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001249 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001250 conforms to the given mode on the PATH, or None if there is no such
1251 file.
1252
1253 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1254 of os.environ.get("PATH"), or can be overridden with a custom search
1255 path.
1256
1257 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001258 # Check that a given file can be accessed with the correct mode.
1259 # Additionally check that `file` is not a directory, as on Windows
1260 # directories pass the os.access check.
1261 def _access_check(fn, mode):
1262 return (os.path.exists(fn) and os.access(fn, mode)
1263 and not os.path.isdir(fn))
1264
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001265 # If we're given a path with a directory part, look it up directly rather
1266 # than referring to PATH directories. This includes checking relative to the
1267 # current directory, e.g. ./script
1268 if os.path.dirname(cmd):
1269 if _access_check(cmd, mode):
1270 return cmd
1271 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001272
Barry Warsaw618738b2013-04-16 11:05:03 -04001273 if path is None:
1274 path = os.environ.get("PATH", os.defpath)
1275 if not path:
1276 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001277 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001278
1279 if sys.platform == "win32":
1280 # The current directory takes precedence on Windows.
1281 if not os.curdir in path:
1282 path.insert(0, os.curdir)
1283
1284 # PATHEXT is necessary to check on Windows.
1285 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1286 # See if the given file matches any of the expected path extensions.
1287 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001288 # If it does match, only test that one, otherwise we have to try
1289 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001290 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1291 files = [cmd]
1292 else:
1293 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001294 else:
1295 # On other platforms you don't have things like PATHEXT to tell you
1296 # what file suffixes are executable, so just pass on cmd as-is.
1297 files = [cmd]
1298
1299 seen = set()
1300 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001301 normdir = os.path.normcase(dir)
1302 if not normdir in seen:
1303 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001304 for thefile in files:
1305 name = os.path.join(dir, thefile)
1306 if _access_check(name, mode):
1307 return name
1308 return None