blob: 74348ba62ef734df9b6cf4ee4cb8e96883651554 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +020013
14try:
15 import zlib
16 del zlib
17 _ZLIB_SUPPORTED = True
18except ImportError:
19 _ZLIB_SUPPORTED = False
Tarek Ziadé396fad72010-02-23 05:30:31 +000020
21try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000022 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010023 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000024 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040025except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000026 _BZ2_SUPPORTED = False
27
28try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030029 import lzma
30 del lzma
31 _LZMA_SUPPORTED = True
32except ImportError:
33 _LZMA_SUPPORTED = False
34
35try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000036 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040037except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000038 getpwnam = None
39
40try:
41 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040042except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000043 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070045_WINDOWS = os.name == 'nt'
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020046posix = nt = None
47if os.name == 'posix':
48 import posix
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070049elif _WINDOWS:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020050 import nt
51
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070052COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 16 * 1024
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020053_HAS_SENDFILE = posix and hasattr(os, "sendfile")
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070054_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020055
Tarek Ziadéc3399782010-02-23 05:39:18 +000056__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
57 "copytree", "move", "rmtree", "Error", "SpecialFileError",
58 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000059 "register_archive_format", "unregister_archive_format",
60 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020061 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020062 "ignore_patterns", "chown", "which", "get_terminal_size",
63 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020064 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000065
Andrew Svetlov3438fa42012-12-17 23:35:18 +020066class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000067 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000068
Hynek Schlawack48653762012-10-07 12:49:58 +020069class SameFileError(Error):
70 """Raised when source and destination are the same file."""
71
Andrew Svetlov3438fa42012-12-17 23:35:18 +020072class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000073 """Raised when trying to do a kind of operation (e.g. copying) which is
74 not supported on a special file (e.g. a named pipe)"""
75
Andrew Svetlov3438fa42012-12-17 23:35:18 +020076class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000077 """Raised when a command could not be executed"""
78
Andrew Svetlov3438fa42012-12-17 23:35:18 +020079class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000080 """Raised when an archive cannot be read"""
81
82class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030083 """Raised when a registry operation with the archiving
Raymond Hettinger15f44ab2016-08-30 10:47:49 -070084 and unpacking registries fails"""
Tarek Ziadé6ac91722010-04-28 17:51:36 +000085
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020086class _GiveupOnFastCopy(Exception):
87 """Raised as a signal to fallback on using raw read()/write()
88 file copy when fast-copy functions fail to do so.
89 """
Tarek Ziadé6ac91722010-04-28 17:51:36 +000090
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070091def _fastcopy_fcopyfile(fsrc, fdst, flags):
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020092 """Copy a regular file content or metadata by using high-performance
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -070093 fcopyfile(3) syscall (macOS).
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +020094 """
95 try:
96 infd = fsrc.fileno()
97 outfd = fdst.fileno()
98 except Exception as err:
99 raise _GiveupOnFastCopy(err) # not a regular file
100
101 try:
102 posix._fcopyfile(infd, outfd, flags)
103 except OSError as err:
104 err.filename = fsrc.name
105 err.filename2 = fdst.name
106 if err.errno in {errno.EINVAL, errno.ENOTSUP}:
107 raise _GiveupOnFastCopy(err)
108 else:
109 raise err from None
110
111def _fastcopy_sendfile(fsrc, fdst):
112 """Copy data from one regular mmap-like fd to another by using
113 high-performance sendfile(2) syscall.
114 This should work on Linux >= 2.6.33 and Solaris only.
115 """
116 # Note: copyfileobj() is left alone in order to not introduce any
117 # unexpected breakage. Possible risks by using zero-copy calls
118 # in copyfileobj() are:
119 # - fdst cannot be open in "a"(ppend) mode
120 # - fsrc and fdst may be open in "t"(ext) mode
121 # - fsrc may be a BufferedReader (which hides unread data in a buffer),
122 # GzipFile (which decompresses data), HTTPResponse (which decodes
123 # chunks).
124 # - possibly others (e.g. encrypted fs/partition?)
125 global _HAS_SENDFILE
126 try:
127 infd = fsrc.fileno()
128 outfd = fdst.fileno()
129 except Exception as err:
130 raise _GiveupOnFastCopy(err) # not a regular file
131
132 # Hopefully the whole file will be copied in a single call.
133 # sendfile() is called in a loop 'till EOF is reached (0 return)
134 # so a bufsize smaller or bigger than the actual file size
135 # should not make any difference, also in case the file content
136 # changes while being copied.
137 try:
138 blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB
139 except Exception:
140 blocksize = 2 ** 27 # 128MB
141
142 offset = 0
143 while True:
144 try:
145 sent = os.sendfile(outfd, infd, offset, blocksize)
146 except OSError as err:
147 # ...in oder to have a more informative exception.
148 err.filename = fsrc.name
149 err.filename2 = fdst.name
150
151 if err.errno == errno.ENOTSOCK:
152 # sendfile() on this platform (probably Linux < 2.6.33)
153 # does not support copies between regular files (only
154 # sockets).
155 _HAS_SENDFILE = False
156 raise _GiveupOnFastCopy(err)
157
158 if err.errno == errno.ENOSPC: # filesystem is full
159 raise err from None
160
161 # Give up on first call and if no data was copied.
162 if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
163 raise _GiveupOnFastCopy(err)
164
165 raise err
166 else:
167 if sent == 0:
168 break # EOF
169 offset += sent
170
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700171def _copyfileobj_readinto(fsrc, fdst, length=COPY_BUFSIZE):
172 """readinto()/memoryview() based variant of copyfileobj().
173 *fsrc* must support readinto() method and both files must be
174 open in binary mode.
175 """
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200176 # Localize variable access to minimize overhead.
177 fsrc_readinto = fsrc.readinto
178 fdst_write = fdst.write
179 with memoryview(bytearray(length)) as mv:
180 while True:
181 n = fsrc_readinto(mv)
182 if not n:
183 break
184 elif n < length:
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700185 with mv[:n] as smv:
186 fdst.write(smv)
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200187 else:
188 fdst_write(mv)
189
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200190def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE):
Greg Stein42bb8b32000-07-12 09:55:30 +0000191 """copy data from file-like object fsrc to file-like object fdst"""
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700192 # Localize variable access to minimize overhead.
193 fsrc_read = fsrc.read
194 fdst_write = fdst.write
195 while True:
196 buf = fsrc_read(length)
197 if not buf:
198 break
199 fdst_write(buf)
Greg Stein42bb8b32000-07-12 09:55:30 +0000200
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000201def _samefile(src, dst):
202 # Macintosh, Unix.
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800203 if isinstance(src, os.DirEntry) and hasattr(os.path, 'samestat'):
204 try:
205 return os.path.samestat(src.stat(), os.stat(dst))
206 except OSError:
207 return False
208
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000209 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +0000210 try:
211 return os.path.samefile(src, dst)
212 except OSError:
213 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000214
215 # All other platforms: check for same pathname.
216 return (os.path.normcase(os.path.abspath(src)) ==
217 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +0000218
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800219def _stat(fn):
220 return fn.stat() if isinstance(fn, os.DirEntry) else os.stat(fn)
221
222def _islink(fn):
223 return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn)
224
Larry Hastingsb4038062012-07-15 10:57:38 -0700225def copyfile(src, dst, *, follow_symlinks=True):
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700226 """Copy data from src to dst in the most efficient way possible.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100227
Larry Hastingsb4038062012-07-15 10:57:38 -0700228 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +0100229 symlink will be created instead of copying the file it points to.
230
231 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000232 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +0200233 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000234
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700235 file_size = 0
236 for i, fn in enumerate([src, dst]):
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000237 try:
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800238 st = _stat(fn)
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000239 except OSError:
240 # File most likely does not exist
241 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000242 else:
243 # XXX What about other special files? (sockets, devices...)
244 if stat.S_ISFIFO(st.st_mode):
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800245 fn = fn.path if isinstance(fn, os.DirEntry) else fn
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000246 raise SpecialFileError("`%s` is a named pipe" % fn)
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700247 if _WINDOWS and i == 0:
248 file_size = st.st_size
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000249
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800250 if not follow_symlinks and _islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100251 os.symlink(os.readlink(src), dst)
252 else:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200253 with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700254 # macOS
255 if _HAS_FCOPYFILE:
256 try:
257 _fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA)
258 return dst
259 except _GiveupOnFastCopy:
260 pass
261 # Linux / Solaris
262 elif _HAS_SENDFILE:
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200263 try:
264 _fastcopy_sendfile(fsrc, fdst)
265 return dst
266 except _GiveupOnFastCopy:
267 pass
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700268 # Windows, see:
269 # https://github.com/python/cpython/pull/7160#discussion_r195405230
270 elif _WINDOWS and file_size > 0:
271 _copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE))
272 return dst
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200273
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -0700274 copyfileobj(fsrc, fdst)
Giampaolo Rodola4a172cc2018-06-12 23:04:50 +0200275
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500276 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000277
Larry Hastingsb4038062012-07-15 10:57:38 -0700278def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100279 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000280
Larry Hastingsb4038062012-07-15 10:57:38 -0700281 If follow_symlinks is not set, symlinks aren't followed if and only
282 if both `src` and `dst` are symlinks. If `lchmod` isn't available
283 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100284
285 """
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800286 if not follow_symlinks and _islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100287 if hasattr(os, 'lchmod'):
288 stat_func, chmod_func = os.lstat, os.lchmod
289 else:
290 return
291 elif hasattr(os, 'chmod'):
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800292 stat_func, chmod_func = _stat, os.chmod
Antoine Pitrou78091e62011-12-29 18:54:15 +0100293 else:
294 return
295
296 st = stat_func(src)
297 chmod_func(dst, stat.S_IMODE(st.st_mode))
298
Larry Hastingsad5ae042012-07-14 17:55:11 -0700299if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700300 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700301 """Copy extended filesystem attributes from `src` to `dst`.
302
303 Overwrite existing attributes.
304
Larry Hastingsb4038062012-07-15 10:57:38 -0700305 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700306
307 """
308
Hynek Schlawack0beab052013-02-05 08:22:44 +0100309 try:
310 names = os.listxattr(src, follow_symlinks=follow_symlinks)
311 except OSError as e:
312 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
313 raise
314 return
315 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700316 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700317 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
318 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700319 except OSError as e:
320 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
321 raise
322else:
323 def _copyxattr(*args, **kwargs):
324 pass
325
Larry Hastingsb4038062012-07-15 10:57:38 -0700326def copystat(src, dst, *, follow_symlinks=True):
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200327 """Copy file metadata
Antoine Pitrou78091e62011-12-29 18:54:15 +0100328
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200329 Copy the permission bits, last access time, last modification time, and
330 flags from `src` to `dst`. On Linux, copystat() also copies the "extended
331 attributes" where possible. The file contents, owner, and group are
332 unaffected. `src` and `dst` are path names given as strings.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100333
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200334 If the optional flag `follow_symlinks` is not set, symlinks aren't
335 followed if and only if both `src` and `dst` are symlinks.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100336 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700337 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100338 pass
339
Larry Hastings9cf065c2012-06-22 16:30:09 -0700340 # follow symlinks (aka don't not follow symlinks)
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800341 follow = follow_symlinks or not (_islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700342 if follow:
343 # use the real function if it exists
344 def lookup(name):
345 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100346 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700347 # use the real function only if it exists
348 # *and* it supports follow_symlinks
349 def lookup(name):
350 fn = getattr(os, name, _nop)
351 if fn in os.supports_follow_symlinks:
352 return fn
353 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100354
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800355 if isinstance(src, os.DirEntry):
356 st = src.stat(follow_symlinks=follow)
357 else:
358 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000359 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700360 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
361 follow_symlinks=follow)
362 try:
363 lookup("chmod")(dst, mode, follow_symlinks=follow)
364 except NotImplementedError:
365 # if we got a NotImplementedError, it's because
366 # * follow_symlinks=False,
367 # * lchown() is unavailable, and
368 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300369 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700370 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
371 # (it returned ENOSUP.)
372 # therefore we're out of options--we simply cannot chown the
373 # symlink. give up, suppress the error.
374 # (which is what shutil always did in this circumstance.)
375 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100376 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000377 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700378 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000379 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700380 for err in 'EOPNOTSUPP', 'ENOTSUP':
381 if hasattr(errno, err) and why.errno == getattr(errno, err):
382 break
383 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000384 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700385 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200386
Larry Hastingsb4038062012-07-15 10:57:38 -0700387def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500388 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000389
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000390 The destination may be a directory.
391
Larry Hastingsb4038062012-07-15 10:57:38 -0700392 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100393 resembles GNU's "cp -P src dst".
394
Hynek Schlawack48653762012-10-07 12:49:58 +0200395 If source and destination are the same file, a SameFileError will be
396 raised.
397
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000398 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000399 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000400 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700401 copyfile(src, dst, follow_symlinks=follow_symlinks)
402 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500403 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000404
Larry Hastingsb4038062012-07-15 10:57:38 -0700405def copy2(src, dst, *, follow_symlinks=True):
Zsolt Cserna4f399be2018-10-23 12:09:50 +0200406 """Copy data and metadata. Return the file's destination.
407
408 Metadata is copied with copystat(). Please see the copystat function
409 for more information.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000410
411 The destination may be a directory.
412
Larry Hastingsb4038062012-07-15 10:57:38 -0700413 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100414 resembles GNU's "cp -P src dst".
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000415 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000416 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000417 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700418 copyfile(src, dst, follow_symlinks=follow_symlinks)
419 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500420 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000421
Georg Brandl2ee470f2008-07-16 12:55:28 +0000422def ignore_patterns(*patterns):
423 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000424
Georg Brandl2ee470f2008-07-16 12:55:28 +0000425 Patterns is a sequence of glob-style patterns
426 that are used to exclude files"""
427 def _ignore_patterns(path, names):
428 ignored_names = []
429 for pattern in patterns:
430 ignored_names.extend(fnmatch.filter(names, pattern))
431 return set(ignored_names)
432 return _ignore_patterns
433
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800434def _copytree(entries, src, dst, symlinks, ignore, copy_function,
435 ignore_dangling_symlinks):
436 if ignore is not None:
437 ignored_names = ignore(src, set(os.listdir(src)))
438 else:
439 ignored_names = set()
440
441 os.makedirs(dst)
442 errors = []
443 use_srcentry = copy_function is copy2 or copy_function is copy
444
445 for srcentry in entries:
446 if srcentry.name in ignored_names:
447 continue
448 srcname = os.path.join(src, srcentry.name)
449 dstname = os.path.join(dst, srcentry.name)
450 srcobj = srcentry if use_srcentry else srcname
451 try:
452 if srcentry.is_symlink():
453 linkto = os.readlink(srcname)
454 if symlinks:
455 # We can't just leave it to `copy_function` because legacy
456 # code with a custom `copy_function` may rely on copytree
457 # doing the right thing.
458 os.symlink(linkto, dstname)
459 copystat(srcobj, dstname, follow_symlinks=not symlinks)
460 else:
461 # ignore dangling symlink if the flag is on
462 if not os.path.exists(linkto) and ignore_dangling_symlinks:
463 continue
464 # otherwise let the copy occurs. copy2 will raise an error
465 if srcentry.is_dir():
466 copytree(srcobj, dstname, symlinks, ignore,
467 copy_function)
468 else:
469 copy_function(srcobj, dstname)
470 elif srcentry.is_dir():
471 copytree(srcobj, dstname, symlinks, ignore, copy_function)
472 else:
473 # Will raise a SpecialFileError for unsupported file types
474 copy_function(srcentry, dstname)
475 # catch the Error from the recursive copytree so that we can
476 # continue with other files
477 except Error as err:
478 errors.extend(err.args[0])
479 except OSError as why:
480 errors.append((srcname, dstname, str(why)))
481 try:
482 copystat(src, dst)
483 except OSError as why:
484 # Copying file access times may fail on Windows
485 if getattr(why, 'winerror', None) is None:
486 errors.append((src, dst, str(why)))
487 if errors:
488 raise Error(errors)
489 return dst
490
Tarek Ziadéfb437512010-04-20 08:57:33 +0000491def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
492 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000493 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000494
495 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000496 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000497
498 If the optional symlinks flag is true, symbolic links in the
499 source tree result in symbolic links in the destination tree; if
500 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000501 links are copied. If the file pointed by the symlink doesn't
502 exist, an exception will be added in the list of errors raised in
503 an Error exception at the end of the copy process.
504
505 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000506 want to silence this exception. Notice that this has no effect on
507 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000508
Georg Brandl2ee470f2008-07-16 12:55:28 +0000509 The optional ignore argument is a callable. If given, it
510 is called with the `src` parameter, which is the directory
511 being visited by copytree(), and `names` which is the list of
512 `src` contents, as returned by os.listdir():
513
514 callable(src, names) -> ignored_names
515
516 Since copytree() is called recursively, the callable will be
517 called once for each directory that is copied. It returns a
518 list of names relative to the `src` directory that should
519 not be copied.
520
Tarek Ziadé5340db32010-04-19 22:30:51 +0000521 The optional copy_function argument is a callable that will be used
522 to copy each file. It will be called with the source path and the
523 destination path as arguments. By default, copy2() is used, but any
524 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000525
526 """
Giampaolo Rodola19c46a42018-11-12 06:18:15 -0800527 with os.scandir(src) as entries:
528 return _copytree(entries=entries, src=src, dst=dst, symlinks=symlinks,
529 ignore=ignore, copy_function=copy_function,
530 ignore_dangling_symlinks=ignore_dangling_symlinks)
Guido van Rossumd7673291998-02-06 21:38:09 +0000531
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200532# version vulnerable to race conditions
533def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000534 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200535 with os.scandir(path) as scandir_it:
536 entries = list(scandir_it)
Christian Heimes9bd667a2008-01-20 15:14:11 +0000537 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200538 onerror(os.scandir, path, sys.exc_info())
539 entries = []
540 for entry in entries:
541 fullname = entry.path
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000542 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200543 is_dir = entry.is_dir(follow_symlinks=False)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200544 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200545 is_dir = False
546 if is_dir:
547 try:
548 if entry.is_symlink():
549 # This can only happen if someone replaces
550 # a directory with a symlink after the call to
551 # os.scandir or entry.is_dir above.
552 raise OSError("Cannot call rmtree on a symbolic link")
553 except OSError:
554 onerror(os.path.islink, fullname, sys.exc_info())
555 continue
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200556 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000557 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000558 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200559 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200560 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200561 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000562 try:
563 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200564 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000565 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000566
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200567# Version using fd-based APIs to protect against races
568def _rmtree_safe_fd(topfd, path, onerror):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200569 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200570 with os.scandir(topfd) as scandir_it:
571 entries = list(scandir_it)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100572 except OSError as err:
573 err.filename = path
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200574 onerror(os.scandir, path, sys.exc_info())
575 return
576 for entry in entries:
577 fullname = os.path.join(path, entry.name)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200578 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200579 is_dir = entry.is_dir(follow_symlinks=False)
580 if is_dir:
581 orig_st = entry.stat(follow_symlinks=False)
582 is_dir = stat.S_ISDIR(orig_st.st_mode)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100583 except OSError:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200584 is_dir = False
585 if is_dir:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200586 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200587 dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100588 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200589 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200590 else:
591 try:
592 if os.path.samestat(orig_st, os.fstat(dirfd)):
593 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200594 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200595 os.rmdir(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100596 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200597 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100598 else:
599 try:
600 # This can only happen if someone replaces
601 # a directory with a symlink after the call to
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200602 # os.scandir or stat.S_ISDIR above.
Hynek Schlawackb5501102012-12-10 09:11:25 +0100603 raise OSError("Cannot call rmtree on a symbolic "
604 "link")
605 except OSError:
606 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200607 finally:
608 os.close(dirfd)
609 else:
610 try:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200611 os.unlink(entry.name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100612 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200613 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200614
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200615_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
616 os.supports_dir_fd and
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200617 os.scandir in os.supports_fd and
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200618 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000619
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200620def rmtree(path, ignore_errors=False, onerror=None):
621 """Recursively delete a directory tree.
622
623 If ignore_errors is set, errors are ignored; otherwise, if onerror
624 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200625 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200626 path is the argument to that function that caused it to fail; and
627 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
628 is false and onerror is None, an exception is raised.
629
630 """
631 if ignore_errors:
632 def onerror(*args):
633 pass
634 elif onerror is None:
635 def onerror(*args):
636 raise
637 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200638 # While the unsafe rmtree works fine on bytes, the fd based does not.
639 if isinstance(path, bytes):
640 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200641 # Note: To guard against symlink races, we use the standard
642 # lstat()/open()/fstat() trick.
643 try:
644 orig_st = os.lstat(path)
645 except Exception:
646 onerror(os.lstat, path, sys.exc_info())
647 return
648 try:
649 fd = os.open(path, os.O_RDONLY)
650 except Exception:
651 onerror(os.lstat, path, sys.exc_info())
652 return
653 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100654 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200655 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200656 try:
657 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200658 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200659 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200660 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100661 try:
662 # symlinks to directories are forbidden, see bug #1669
663 raise OSError("Cannot call rmtree on a symbolic link")
664 except OSError:
665 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200666 finally:
667 os.close(fd)
668 else:
Serhiy Storchakad4d79bc2017-11-04 14:16:35 +0200669 try:
670 if os.path.islink(path):
671 # symlinks to directories are forbidden, see bug #1669
672 raise OSError("Cannot call rmtree on a symbolic link")
673 except OSError:
674 onerror(os.path.islink, path, sys.exc_info())
675 # can't continue even if onerror hook returns
676 return
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200677 return _rmtree_unsafe(path, onerror)
678
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000679# Allow introspection of whether or not the hardening against symlink
680# attacks is supported on the current platform
681rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000682
Christian Heimesada8c3b2008-03-18 18:26:33 +0000683def _basename(path):
684 # A basename() variant which first strips the trailing slash, if present.
685 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200686 sep = os.path.sep + (os.path.altsep or '')
687 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000688
R David Murray6ffface2014-06-11 14:40:13 -0400689def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000690 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500691 similar to the Unix "mv" command. Return the file or directory's
692 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000693
694 If the destination is a directory or a symlink to a directory, the source
695 is moved inside the directory. The destination path must not already
696 exist.
697
698 If the destination already exists but is not a directory, it may be
699 overwritten depending on os.rename() semantics.
700
701 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100702 Otherwise, src is copied to the destination and then removed. Symlinks are
703 recreated under the new name if os.rename() fails because of cross
704 filesystem renames.
705
R David Murray6ffface2014-06-11 14:40:13 -0400706 The optional `copy_function` argument is a callable that will be used
707 to copy the source or it will be delegated to `copytree`.
708 By default, copy2() is used, but any function that supports the same
709 signature (like copy()) can be used.
710
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000711 A lot more could be done here... A look at a mv.c shows a lot of
712 the issues this implementation glosses over.
713
714 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000715 real_dst = dst
716 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200717 if _samefile(src, dst):
718 # We might be on a case insensitive filesystem,
719 # perform the rename anyway.
720 os.rename(src, dst)
721 return
722
Christian Heimesada8c3b2008-03-18 18:26:33 +0000723 real_dst = os.path.join(dst, _basename(src))
724 if os.path.exists(real_dst):
725 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000726 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000727 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200728 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100729 if os.path.islink(src):
730 linkto = os.readlink(src)
731 os.symlink(linkto, real_dst)
732 os.unlink(src)
733 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000734 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400735 raise Error("Cannot move a directory '%s' into itself"
736 " '%s'." % (src, dst))
737 copytree(src, real_dst, copy_function=copy_function,
738 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000739 rmtree(src)
740 else:
R David Murray6ffface2014-06-11 14:40:13 -0400741 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000742 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500743 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000744
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000745def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300746 src = os.path.abspath(src)
747 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000748 if not src.endswith(os.path.sep):
749 src += os.path.sep
750 if not dst.endswith(os.path.sep):
751 dst += os.path.sep
752 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000753
754def _get_gid(name):
755 """Returns a gid, given a group name."""
756 if getgrnam is None or name is None:
757 return None
758 try:
759 result = getgrnam(name)
760 except KeyError:
761 result = None
762 if result is not None:
763 return result[2]
764 return None
765
766def _get_uid(name):
767 """Returns an uid, given a user name."""
768 if getpwnam is None or name is None:
769 return None
770 try:
771 result = getpwnam(name)
772 except KeyError:
773 result = None
774 if result is not None:
775 return result[2]
776 return None
777
778def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
779 owner=None, group=None, logger=None):
780 """Create a (possibly compressed) tar file from all the files under
781 'base_dir'.
782
Serhiy Storchaka11213772014-08-06 18:50:19 +0300783 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000784
785 'owner' and 'group' can be used to define an owner and a group for the
786 archive that is being built. If not provided, the current owner and group
787 will be used.
788
Éric Araujo4433a5f2010-12-15 20:26:30 +0000789 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300790 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000791
792 Returns the output filename.
793 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200794 if compress is None:
795 tar_compression = ''
796 elif _ZLIB_SUPPORTED and compress == 'gzip':
797 tar_compression = 'gz'
798 elif _BZ2_SUPPORTED and compress == 'bzip2':
799 tar_compression = 'bz2'
800 elif _LZMA_SUPPORTED and compress == 'xz':
801 tar_compression = 'xz'
802 else:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000803 raise ValueError("bad value for 'compress', or compression format not "
804 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000805
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200806 import tarfile # late import for breaking circular dependency
807
808 compress_ext = '.' + tar_compression if compress else ''
809 archive_name = base_name + '.tar' + compress_ext
Tarek Ziadé396fad72010-02-23 05:30:31 +0000810 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000811
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200812 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000813 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200814 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000815 if not dry_run:
816 os.makedirs(archive_dir)
817
Tarek Ziadé396fad72010-02-23 05:30:31 +0000818 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000819 if logger is not None:
820 logger.info('Creating tar archive')
821
822 uid = _get_uid(owner)
823 gid = _get_gid(group)
824
825 def _set_uid_gid(tarinfo):
826 if gid is not None:
827 tarinfo.gid = gid
828 tarinfo.gname = group
829 if uid is not None:
830 tarinfo.uid = uid
831 tarinfo.uname = owner
832 return tarinfo
833
834 if not dry_run:
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200835 tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000836 try:
837 tar.add(base_dir, filter=_set_uid_gid)
838 finally:
839 tar.close()
840
Tarek Ziadé396fad72010-02-23 05:30:31 +0000841 return archive_name
842
Tarek Ziadé396fad72010-02-23 05:30:31 +0000843def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
844 """Create a zip file from all the files under 'base_dir'.
845
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200846 The output zip file will be named 'base_name' + ".zip". Returns the
847 name of the output zip file.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000848 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200849 import zipfile # late import for breaking circular dependency
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400850
Tarek Ziadé396fad72010-02-23 05:30:31 +0000851 zip_filename = base_name + ".zip"
852 archive_dir = os.path.dirname(base_name)
853
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200854 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000855 if logger is not None:
856 logger.info("creating %s", archive_dir)
857 if not dry_run:
858 os.makedirs(archive_dir)
859
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400860 if logger is not None:
861 logger.info("creating '%s' and adding '%s' to it",
862 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000863
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400864 if not dry_run:
865 with zipfile.ZipFile(zip_filename, "w",
866 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300867 path = os.path.normpath(base_dir)
Serhiy Storchaka666de772016-10-23 15:55:09 +0300868 if path != os.curdir:
869 zf.write(path, path)
870 if logger is not None:
871 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400872 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300873 for name in sorted(dirnames):
874 path = os.path.normpath(os.path.join(dirpath, name))
875 zf.write(path, path)
876 if logger is not None:
877 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400878 for name in filenames:
879 path = os.path.normpath(os.path.join(dirpath, name))
880 if os.path.isfile(path):
881 zf.write(path, path)
882 if logger is not None:
883 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000884
885 return zip_filename
886
887_ARCHIVE_FORMATS = {
Tarek Ziadé396fad72010-02-23 05:30:31 +0000888 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200889}
890
891if _ZLIB_SUPPORTED:
892 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
893 "gzip'ed tar-file")
894 _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000895
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000896if _BZ2_SUPPORTED:
897 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
898 "bzip2'ed tar-file")
899
Serhiy Storchaka11213772014-08-06 18:50:19 +0300900if _LZMA_SUPPORTED:
901 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
902 "xz'ed tar-file")
903
Tarek Ziadé396fad72010-02-23 05:30:31 +0000904def get_archive_formats():
905 """Returns a list of supported formats for archiving and unarchiving.
906
907 Each element of the returned sequence is a tuple (name, description)
908 """
909 formats = [(name, registry[2]) for name, registry in
910 _ARCHIVE_FORMATS.items()]
911 formats.sort()
912 return formats
913
914def register_archive_format(name, function, extra_args=None, description=''):
915 """Registers an archive format.
916
917 name is the name of the format. function is the callable that will be
918 used to create archives. If provided, extra_args is a sequence of
919 (name, value) tuples that will be passed as arguments to the callable.
920 description can be provided to describe the format, and will be returned
921 by the get_archive_formats() function.
922 """
923 if extra_args is None:
924 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200925 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000926 raise TypeError('The %s object is not callable' % function)
927 if not isinstance(extra_args, (tuple, list)):
928 raise TypeError('extra_args needs to be a sequence')
929 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200930 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000931 raise TypeError('extra_args elements are : (arg_name, value)')
932
933 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
934
935def unregister_archive_format(name):
936 del _ARCHIVE_FORMATS[name]
937
938def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
939 dry_run=0, owner=None, group=None, logger=None):
940 """Create an archive file (eg. zip or tar).
941
942 'base_name' is the name of the file to create, minus any format-specific
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200943 extension; 'format' is the archive format: one of "zip", "tar", "gztar",
944 "bztar", or "xztar". Or any other registered format.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000945
946 'root_dir' is a directory that will be the root directory of the
947 archive; ie. we typically chdir into 'root_dir' before creating the
948 archive. 'base_dir' is the directory where we start archiving from;
949 ie. 'base_dir' will be the common prefix of all files and
950 directories in the archive. 'root_dir' and 'base_dir' both default
951 to the current directory. Returns the name of the archive file.
952
953 'owner' and 'group' are used when creating a tar archive. By default,
954 uses the current owner and group.
955 """
956 save_cwd = os.getcwd()
957 if root_dir is not None:
958 if logger is not None:
959 logger.debug("changing into '%s'", root_dir)
960 base_name = os.path.abspath(base_name)
961 if not dry_run:
962 os.chdir(root_dir)
963
964 if base_dir is None:
965 base_dir = os.curdir
966
967 kwargs = {'dry_run': dry_run, 'logger': logger}
968
969 try:
970 format_info = _ARCHIVE_FORMATS[format]
971 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300972 raise ValueError("unknown archive format '%s'" % format) from None
Tarek Ziadé396fad72010-02-23 05:30:31 +0000973
974 func = format_info[0]
975 for arg, val in format_info[1]:
976 kwargs[arg] = val
977
978 if format != 'zip':
979 kwargs['owner'] = owner
980 kwargs['group'] = group
981
982 try:
983 filename = func(base_name, base_dir, **kwargs)
984 finally:
985 if root_dir is not None:
986 if logger is not None:
987 logger.debug("changing back to '%s'", save_cwd)
988 os.chdir(save_cwd)
989
990 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000991
992
993def get_unpack_formats():
994 """Returns a list of supported formats for unpacking.
995
996 Each element of the returned sequence is a tuple
997 (name, extensions, description)
998 """
999 formats = [(name, info[0], info[3]) for name, info in
1000 _UNPACK_FORMATS.items()]
1001 formats.sort()
1002 return formats
1003
1004def _check_unpack_options(extensions, function, extra_args):
1005 """Checks what gets registered as an unpacker."""
1006 # first make sure no other unpacker is registered for this extension
1007 existing_extensions = {}
1008 for name, info in _UNPACK_FORMATS.items():
1009 for ext in info[0]:
1010 existing_extensions[ext] = name
1011
1012 for extension in extensions:
1013 if extension in existing_extensions:
1014 msg = '%s is already registered for "%s"'
1015 raise RegistryError(msg % (extension,
1016 existing_extensions[extension]))
1017
Florent Xicluna5d1155c2011-10-28 14:45:05 +02001018 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001019 raise TypeError('The registered function must be a callable')
1020
1021
1022def register_unpack_format(name, extensions, function, extra_args=None,
1023 description=''):
1024 """Registers an unpack format.
1025
1026 `name` is the name of the format. `extensions` is a list of extensions
1027 corresponding to the format.
1028
1029 `function` is the callable that will be
1030 used to unpack archives. The callable will receive archives to unpack.
1031 If it's unable to handle an archive, it needs to raise a ReadError
1032 exception.
1033
1034 If provided, `extra_args` is a sequence of
1035 (name, value) tuples that will be passed as arguments to the callable.
1036 description can be provided to describe the format, and will be returned
1037 by the get_unpack_formats() function.
1038 """
1039 if extra_args is None:
1040 extra_args = []
1041 _check_unpack_options(extensions, function, extra_args)
1042 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
1043
1044def unregister_unpack_format(name):
Martin Pantereb995702016-07-28 01:11:04 +00001045 """Removes the pack format from the registry."""
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001046 del _UNPACK_FORMATS[name]
1047
1048def _ensure_directory(path):
1049 """Ensure that the parent directory of `path` exists"""
1050 dirname = os.path.dirname(path)
1051 if not os.path.isdir(dirname):
1052 os.makedirs(dirname)
1053
1054def _unpack_zipfile(filename, extract_dir):
1055 """Unpack zip `filename` to `extract_dir`
1056 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001057 import zipfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001058
1059 if not zipfile.is_zipfile(filename):
1060 raise ReadError("%s is not a zip file" % filename)
1061
1062 zip = zipfile.ZipFile(filename)
1063 try:
1064 for info in zip.infolist():
1065 name = info.filename
1066
1067 # don't extract absolute paths or ones with .. in them
1068 if name.startswith('/') or '..' in name:
1069 continue
1070
1071 target = os.path.join(extract_dir, *name.split('/'))
1072 if not target:
1073 continue
1074
1075 _ensure_directory(target)
1076 if not name.endswith('/'):
1077 # file
1078 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +02001079 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001080 try:
1081 f.write(data)
1082 finally:
1083 f.close()
1084 del data
1085 finally:
1086 zip.close()
1087
1088def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +03001089 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001090 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001091 import tarfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001092 try:
1093 tarobj = tarfile.open(filename)
1094 except tarfile.TarError:
1095 raise ReadError(
1096 "%s is not a compressed or uncompressed tar file" % filename)
1097 try:
1098 tarobj.extractall(extract_dir)
1099 finally:
1100 tarobj.close()
1101
1102_UNPACK_FORMATS = {
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001103 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001104 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"),
1105}
1106
1107if _ZLIB_SUPPORTED:
1108 _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
1109 "gzip'ed tar-file")
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001110
Tarek Ziadéffa155a2010-04-29 13:34:35 +00001111if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +03001112 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +00001113 "bzip2'ed tar-file")
1114
Serhiy Storchaka11213772014-08-06 18:50:19 +03001115if _LZMA_SUPPORTED:
1116 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
1117 "xz'ed tar-file")
1118
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001119def _find_unpack_format(filename):
1120 for name, info in _UNPACK_FORMATS.items():
1121 for extension in info[0]:
1122 if filename.endswith(extension):
1123 return name
1124 return None
1125
1126def unpack_archive(filename, extract_dir=None, format=None):
1127 """Unpack an archive.
1128
1129 `filename` is the name of the archive.
1130
1131 `extract_dir` is the name of the target directory, where the archive
1132 is unpacked. If not provided, the current working directory is used.
1133
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +02001134 `format` is the archive format: one of "zip", "tar", "gztar", "bztar",
1135 or "xztar". Or any other registered format. If not provided,
1136 unpack_archive will use the filename extension and see if an unpacker
1137 was registered for that extension.
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001138
1139 In case none is found, a ValueError is raised.
1140 """
1141 if extract_dir is None:
1142 extract_dir = os.getcwd()
1143
Jelle Zijlstraa12df7b2017-05-05 14:27:12 -07001144 extract_dir = os.fspath(extract_dir)
1145 filename = os.fspath(filename)
1146
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001147 if format is not None:
1148 try:
1149 format_info = _UNPACK_FORMATS[format]
1150 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001151 raise ValueError("Unknown unpack format '{0}'".format(format)) from None
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001152
Nick Coghlanabf202d2011-03-16 13:52:20 -04001153 func = format_info[1]
1154 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +00001155 else:
1156 # we need to look at the registered unpackers supported extensions
1157 format = _find_unpack_format(filename)
1158 if format is None:
1159 raise ReadError("Unknown archive format '{0}'".format(filename))
1160
1161 func = _UNPACK_FORMATS[format][1]
1162 kwargs = dict(_UNPACK_FORMATS[format][2])
1163 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001164
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001165
1166if hasattr(os, 'statvfs'):
1167
1168 __all__.append('disk_usage')
1169 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Raymond Hettinger5b798ab2015-08-17 22:04:45 -07001170 _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
1171 _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
1172 _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001173
1174 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001175 """Return disk usage statistics about the given path.
1176
Sandro Tosif8ae4fa2012-04-23 20:07:15 +02001177 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001178 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001179 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001180 st = os.statvfs(path)
1181 free = st.f_bavail * st.f_frsize
1182 total = st.f_blocks * st.f_frsize
1183 used = (st.f_blocks - st.f_bfree) * st.f_frsize
1184 return _ntuple_diskusage(total, used, free)
1185
Giampaolo Rodolac7f02a92018-06-19 08:27:29 -07001186elif _WINDOWS:
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001187
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001188 __all__.append('disk_usage')
1189 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
1190
1191 def disk_usage(path):
1192 """Return disk usage statistics about the given path.
1193
Ezio Melotti30b9d5d2013-08-17 15:50:46 +03001194 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001195 'free', which are the amount of total, used and free space, in bytes.
1196 """
1197 total, free = nt._getdiskusage(path)
1198 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001199 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001200
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001201
Sandro Tosid902a142011-08-22 23:28:27 +02001202def chown(path, user=None, group=None):
1203 """Change owner user and group of the given path.
1204
1205 user and group can be the uid/gid or the user/group names, and in that case,
1206 they are converted to their respective uid/gid.
1207 """
1208
1209 if user is None and group is None:
1210 raise ValueError("user and/or group must be set")
1211
1212 _user = user
1213 _group = group
1214
1215 # -1 means don't change it
1216 if user is None:
1217 _user = -1
1218 # user can either be an int (the uid) or a string (the system username)
1219 elif isinstance(user, str):
1220 _user = _get_uid(user)
1221 if _user is None:
1222 raise LookupError("no such user: {!r}".format(user))
1223
1224 if group is None:
1225 _group = -1
1226 elif not isinstance(group, int):
1227 _group = _get_gid(group)
1228 if _group is None:
1229 raise LookupError("no such group: {!r}".format(group))
1230
1231 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001232
1233def get_terminal_size(fallback=(80, 24)):
1234 """Get the size of the terminal window.
1235
1236 For each of the two dimensions, the environment variable, COLUMNS
1237 and LINES respectively, is checked. If the variable is defined and
1238 the value is a positive integer, it is used.
1239
1240 When COLUMNS or LINES is not defined, which is the common case,
1241 the terminal connected to sys.__stdout__ is queried
1242 by invoking os.get_terminal_size.
1243
1244 If the terminal size cannot be successfully queried, either because
1245 the system doesn't support querying, or because we are not
1246 connected to a terminal, the value given in fallback parameter
1247 is used. Fallback defaults to (80, 24) which is the default
1248 size used by many terminal emulators.
1249
1250 The value returned is a named tuple of type os.terminal_size.
1251 """
1252 # columns, lines are the working values
1253 try:
1254 columns = int(os.environ['COLUMNS'])
1255 except (KeyError, ValueError):
1256 columns = 0
1257
1258 try:
1259 lines = int(os.environ['LINES'])
1260 except (KeyError, ValueError):
1261 lines = 0
1262
1263 # only query if necessary
1264 if columns <= 0 or lines <= 0:
1265 try:
1266 size = os.get_terminal_size(sys.__stdout__.fileno())
Serhiy Storchakad30829d2016-04-24 09:58:43 +03001267 except (AttributeError, ValueError, OSError):
1268 # stdout is None, closed, detached, or not a terminal, or
1269 # os.get_terminal_size() is unsupported
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001270 size = os.terminal_size(fallback)
1271 if columns <= 0:
1272 columns = size.columns
1273 if lines <= 0:
1274 lines = size.lines
1275
1276 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001277
1278def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001279 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001280 conforms to the given mode on the PATH, or None if there is no such
1281 file.
1282
1283 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1284 of os.environ.get("PATH"), or can be overridden with a custom search
1285 path.
1286
1287 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001288 # Check that a given file can be accessed with the correct mode.
1289 # Additionally check that `file` is not a directory, as on Windows
1290 # directories pass the os.access check.
1291 def _access_check(fn, mode):
1292 return (os.path.exists(fn) and os.access(fn, mode)
1293 and not os.path.isdir(fn))
1294
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001295 # If we're given a path with a directory part, look it up directly rather
1296 # than referring to PATH directories. This includes checking relative to the
1297 # current directory, e.g. ./script
1298 if os.path.dirname(cmd):
1299 if _access_check(cmd, mode):
1300 return cmd
1301 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001302
Barry Warsaw618738b2013-04-16 11:05:03 -04001303 if path is None:
1304 path = os.environ.get("PATH", os.defpath)
1305 if not path:
1306 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001307 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001308
1309 if sys.platform == "win32":
1310 # The current directory takes precedence on Windows.
1311 if not os.curdir in path:
1312 path.insert(0, os.curdir)
1313
1314 # PATHEXT is necessary to check on Windows.
1315 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1316 # See if the given file matches any of the expected path extensions.
1317 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001318 # If it does match, only test that one, otherwise we have to try
1319 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001320 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1321 files = [cmd]
1322 else:
1323 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001324 else:
1325 # On other platforms you don't have things like PATHEXT to tell you
1326 # what file suffixes are executable, so just pass on cmd as-is.
1327 files = [cmd]
1328
1329 seen = set()
1330 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001331 normdir = os.path.normcase(dir)
1332 if not normdir in seen:
1333 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001334 for thefile in files:
1335 name = os.path.join(dir, thefile)
1336 if _access_check(name, mode):
1337 return name
1338 return None