blob: 464ee912f5ccd8b048d155c195ece75b70bd6b40 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +020013
14try:
15 import zlib
16 del zlib
17 _ZLIB_SUPPORTED = True
18except ImportError:
19 _ZLIB_SUPPORTED = False
Tarek Ziadé396fad72010-02-23 05:30:31 +000020
21try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000022 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010023 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000024 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040025except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000026 _BZ2_SUPPORTED = False
27
28try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030029 import lzma
30 del lzma
31 _LZMA_SUPPORTED = True
32except ImportError:
33 _LZMA_SUPPORTED = False
34
35try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000036 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040037except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000038 getpwnam = None
39
40try:
41 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040042except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000043 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Tarek Ziadéc3399782010-02-23 05:39:18 +000045__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
46 "copytree", "move", "rmtree", "Error", "SpecialFileError",
47 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000048 "register_archive_format", "unregister_archive_format",
49 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020050 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020051 "ignore_patterns", "chown", "which", "get_terminal_size",
52 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020053 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000054
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000056 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000057
Hynek Schlawack48653762012-10-07 12:49:58 +020058class SameFileError(Error):
59 """Raised when source and destination are the same file."""
60
Andrew Svetlov3438fa42012-12-17 23:35:18 +020061class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000062 """Raised when trying to do a kind of operation (e.g. copying) which is
63 not supported on a special file (e.g. a named pipe)"""
64
Andrew Svetlov3438fa42012-12-17 23:35:18 +020065class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000066 """Raised when a command could not be executed"""
67
Andrew Svetlov3438fa42012-12-17 23:35:18 +020068class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000069 """Raised when an archive cannot be read"""
70
71class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030072 """Raised when a registry operation with the archiving
Raymond Hettinger15f44ab2016-08-30 10:47:49 -070073 and unpacking registries fails"""
Tarek Ziadé6ac91722010-04-28 17:51:36 +000074
75
Greg Stein42bb8b32000-07-12 09:55:30 +000076def copyfileobj(fsrc, fdst, length=16*1024):
77 """copy data from file-like object fsrc to file-like object fdst"""
78 while 1:
79 buf = fsrc.read(length)
80 if not buf:
81 break
82 fdst.write(buf)
83
Johannes Gijsbers46f14592004-08-14 13:30:02 +000084def _samefile(src, dst):
85 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000086 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000087 try:
88 return os.path.samefile(src, dst)
89 except OSError:
90 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000091
92 # All other platforms: check for same pathname.
93 return (os.path.normcase(os.path.abspath(src)) ==
94 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000095
Larry Hastingsb4038062012-07-15 10:57:38 -070096def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010097 """Copy data from src to dst.
98
Larry Hastingsb4038062012-07-15 10:57:38 -070099 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +0100100 symlink will be created instead of copying the file it points to.
101
102 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000103 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +0200104 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +0000105
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000106 for fn in [src, dst]:
107 try:
108 st = os.stat(fn)
109 except OSError:
110 # File most likely does not exist
111 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000112 else:
113 # XXX What about other special files? (sockets, devices...)
114 if stat.S_ISFIFO(st.st_mode):
115 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000116
Larry Hastingsb4038062012-07-15 10:57:38 -0700117 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100118 os.symlink(os.readlink(src), dst)
119 else:
120 with open(src, 'rb') as fsrc:
121 with open(dst, 'wb') as fdst:
122 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500123 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000124
Larry Hastingsb4038062012-07-15 10:57:38 -0700125def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100126 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000127
Larry Hastingsb4038062012-07-15 10:57:38 -0700128 If follow_symlinks is not set, symlinks aren't followed if and only
129 if both `src` and `dst` are symlinks. If `lchmod` isn't available
130 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100131
132 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700133 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100134 if hasattr(os, 'lchmod'):
135 stat_func, chmod_func = os.lstat, os.lchmod
136 else:
137 return
138 elif hasattr(os, 'chmod'):
139 stat_func, chmod_func = os.stat, os.chmod
140 else:
141 return
142
143 st = stat_func(src)
144 chmod_func(dst, stat.S_IMODE(st.st_mode))
145
Larry Hastingsad5ae042012-07-14 17:55:11 -0700146if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700147 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700148 """Copy extended filesystem attributes from `src` to `dst`.
149
150 Overwrite existing attributes.
151
Larry Hastingsb4038062012-07-15 10:57:38 -0700152 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700153
154 """
155
Hynek Schlawack0beab052013-02-05 08:22:44 +0100156 try:
157 names = os.listxattr(src, follow_symlinks=follow_symlinks)
158 except OSError as e:
159 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
160 raise
161 return
162 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700163 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700164 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
165 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700166 except OSError as e:
167 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
168 raise
169else:
170 def _copyxattr(*args, **kwargs):
171 pass
172
Larry Hastingsb4038062012-07-15 10:57:38 -0700173def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100174 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
175
Larry Hastingsb4038062012-07-15 10:57:38 -0700176 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100177 only if both `src` and `dst` are symlinks.
178
179 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700180 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100181 pass
182
Larry Hastings9cf065c2012-06-22 16:30:09 -0700183 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700184 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700185 if follow:
186 # use the real function if it exists
187 def lookup(name):
188 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100189 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700190 # use the real function only if it exists
191 # *and* it supports follow_symlinks
192 def lookup(name):
193 fn = getattr(os, name, _nop)
194 if fn in os.supports_follow_symlinks:
195 return fn
196 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100197
Larry Hastings9cf065c2012-06-22 16:30:09 -0700198 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000199 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700200 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
201 follow_symlinks=follow)
202 try:
203 lookup("chmod")(dst, mode, follow_symlinks=follow)
204 except NotImplementedError:
205 # if we got a NotImplementedError, it's because
206 # * follow_symlinks=False,
207 # * lchown() is unavailable, and
208 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300209 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700210 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
211 # (it returned ENOSUP.)
212 # therefore we're out of options--we simply cannot chown the
213 # symlink. give up, suppress the error.
214 # (which is what shutil always did in this circumstance.)
215 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100216 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000217 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700218 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000219 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700220 for err in 'EOPNOTSUPP', 'ENOTSUP':
221 if hasattr(errno, err) and why.errno == getattr(errno, err):
222 break
223 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000224 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700225 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200226
Larry Hastingsb4038062012-07-15 10:57:38 -0700227def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500228 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000229
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000230 The destination may be a directory.
231
Larry Hastingsb4038062012-07-15 10:57:38 -0700232 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100233 resembles GNU's "cp -P src dst".
234
Hynek Schlawack48653762012-10-07 12:49:58 +0200235 If source and destination are the same file, a SameFileError will be
236 raised.
237
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000238 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000239 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000240 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700241 copyfile(src, dst, follow_symlinks=follow_symlinks)
242 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500243 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000244
Larry Hastingsb4038062012-07-15 10:57:38 -0700245def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500246 """Copy data and all stat info ("cp -p src dst"). Return the file's
247 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000248
249 The destination may be a directory.
250
Larry Hastingsb4038062012-07-15 10:57:38 -0700251 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100252 resembles GNU's "cp -P src dst".
253
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000254 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000255 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000256 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700257 copyfile(src, dst, follow_symlinks=follow_symlinks)
258 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500259 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000260
Georg Brandl2ee470f2008-07-16 12:55:28 +0000261def ignore_patterns(*patterns):
262 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000263
Georg Brandl2ee470f2008-07-16 12:55:28 +0000264 Patterns is a sequence of glob-style patterns
265 that are used to exclude files"""
266 def _ignore_patterns(path, names):
267 ignored_names = []
268 for pattern in patterns:
269 ignored_names.extend(fnmatch.filter(names, pattern))
270 return set(ignored_names)
271 return _ignore_patterns
272
Tarek Ziadéfb437512010-04-20 08:57:33 +0000273def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
274 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000275 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000276
277 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000278 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000279
280 If the optional symlinks flag is true, symbolic links in the
281 source tree result in symbolic links in the destination tree; if
282 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000283 links are copied. If the file pointed by the symlink doesn't
284 exist, an exception will be added in the list of errors raised in
285 an Error exception at the end of the copy process.
286
287 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000288 want to silence this exception. Notice that this has no effect on
289 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000290
Georg Brandl2ee470f2008-07-16 12:55:28 +0000291 The optional ignore argument is a callable. If given, it
292 is called with the `src` parameter, which is the directory
293 being visited by copytree(), and `names` which is the list of
294 `src` contents, as returned by os.listdir():
295
296 callable(src, names) -> ignored_names
297
298 Since copytree() is called recursively, the callable will be
299 called once for each directory that is copied. It returns a
300 list of names relative to the `src` directory that should
301 not be copied.
302
Tarek Ziadé5340db32010-04-19 22:30:51 +0000303 The optional copy_function argument is a callable that will be used
304 to copy each file. It will be called with the source path and the
305 destination path as arguments. By default, copy2() is used, but any
306 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000307
308 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000309 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000310 if ignore is not None:
311 ignored_names = ignore(src, names)
312 else:
313 ignored_names = set()
314
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000315 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000316 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000317 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000318 if name in ignored_names:
319 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000320 srcname = os.path.join(src, name)
321 dstname = os.path.join(dst, name)
322 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000323 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000324 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000325 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100326 # We can't just leave it to `copy_function` because legacy
327 # code with a custom `copy_function` may rely on copytree
328 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000329 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700330 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000331 else:
332 # ignore dangling symlink if the flag is on
333 if not os.path.exists(linkto) and ignore_dangling_symlinks:
334 continue
335 # otherwise let the copy occurs. copy2 will raise an error
Berker Peksag5a294d82015-07-25 14:53:48 +0300336 if os.path.isdir(srcname):
337 copytree(srcname, dstname, symlinks, ignore,
338 copy_function)
339 else:
340 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000341 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000342 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000343 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000344 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000345 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000346 # catch the Error from the recursive copytree so that we can
347 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000348 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000349 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200350 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000351 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352 try:
353 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000354 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200355 # Copying file access times may fail on Windows
Berker Peksag884afd92014-12-10 02:50:32 +0200356 if getattr(why, 'winerror', None) is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200357 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000358 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000359 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500360 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000361
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200362# version vulnerable to race conditions
363def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000364 try:
365 if os.path.islink(path):
366 # symlinks to directories are forbidden, see bug #1669
367 raise OSError("Cannot call rmtree on a symbolic link")
368 except OSError:
369 onerror(os.path.islink, path, sys.exc_info())
370 # can't continue even if onerror hook returns
371 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000372 names = []
373 try:
374 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200375 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000376 onerror(os.listdir, path, sys.exc_info())
377 for name in names:
378 fullname = os.path.join(path, name)
379 try:
380 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200381 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000382 mode = 0
383 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200384 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000385 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000386 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200387 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200388 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200389 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000390 try:
391 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200392 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000393 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000394
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200395# Version using fd-based APIs to protect against races
396def _rmtree_safe_fd(topfd, path, onerror):
397 names = []
398 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200399 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100400 except OSError as err:
401 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200402 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200403 for name in names:
404 fullname = os.path.join(path, name)
405 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200406 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200407 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100408 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200409 mode = 0
410 if stat.S_ISDIR(mode):
411 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200412 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100413 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200414 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200415 else:
416 try:
417 if os.path.samestat(orig_st, os.fstat(dirfd)):
418 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200419 try:
420 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100421 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200422 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100423 else:
424 try:
425 # This can only happen if someone replaces
426 # a directory with a symlink after the call to
427 # stat.S_ISDIR above.
428 raise OSError("Cannot call rmtree on a symbolic "
429 "link")
430 except OSError:
431 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200432 finally:
433 os.close(dirfd)
434 else:
435 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200436 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100437 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200438 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200439
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200440_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
441 os.supports_dir_fd and
442 os.listdir in os.supports_fd and
443 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000444
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200445def rmtree(path, ignore_errors=False, onerror=None):
446 """Recursively delete a directory tree.
447
448 If ignore_errors is set, errors are ignored; otherwise, if onerror
449 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200450 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200451 path is the argument to that function that caused it to fail; and
452 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
453 is false and onerror is None, an exception is raised.
454
455 """
456 if ignore_errors:
457 def onerror(*args):
458 pass
459 elif onerror is None:
460 def onerror(*args):
461 raise
462 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200463 # While the unsafe rmtree works fine on bytes, the fd based does not.
464 if isinstance(path, bytes):
465 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200466 # Note: To guard against symlink races, we use the standard
467 # lstat()/open()/fstat() trick.
468 try:
469 orig_st = os.lstat(path)
470 except Exception:
471 onerror(os.lstat, path, sys.exc_info())
472 return
473 try:
474 fd = os.open(path, os.O_RDONLY)
475 except Exception:
476 onerror(os.lstat, path, sys.exc_info())
477 return
478 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100479 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200480 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200481 try:
482 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200483 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200484 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200485 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100486 try:
487 # symlinks to directories are forbidden, see bug #1669
488 raise OSError("Cannot call rmtree on a symbolic link")
489 except OSError:
490 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200491 finally:
492 os.close(fd)
493 else:
494 return _rmtree_unsafe(path, onerror)
495
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000496# Allow introspection of whether or not the hardening against symlink
497# attacks is supported on the current platform
498rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000499
Christian Heimesada8c3b2008-03-18 18:26:33 +0000500def _basename(path):
501 # A basename() variant which first strips the trailing slash, if present.
502 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200503 sep = os.path.sep + (os.path.altsep or '')
504 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000505
R David Murray6ffface2014-06-11 14:40:13 -0400506def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000507 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500508 similar to the Unix "mv" command. Return the file or directory's
509 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000510
511 If the destination is a directory or a symlink to a directory, the source
512 is moved inside the directory. The destination path must not already
513 exist.
514
515 If the destination already exists but is not a directory, it may be
516 overwritten depending on os.rename() semantics.
517
518 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100519 Otherwise, src is copied to the destination and then removed. Symlinks are
520 recreated under the new name if os.rename() fails because of cross
521 filesystem renames.
522
R David Murray6ffface2014-06-11 14:40:13 -0400523 The optional `copy_function` argument is a callable that will be used
524 to copy the source or it will be delegated to `copytree`.
525 By default, copy2() is used, but any function that supports the same
526 signature (like copy()) can be used.
527
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000528 A lot more could be done here... A look at a mv.c shows a lot of
529 the issues this implementation glosses over.
530
531 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000532 real_dst = dst
533 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200534 if _samefile(src, dst):
535 # We might be on a case insensitive filesystem,
536 # perform the rename anyway.
537 os.rename(src, dst)
538 return
539
Christian Heimesada8c3b2008-03-18 18:26:33 +0000540 real_dst = os.path.join(dst, _basename(src))
541 if os.path.exists(real_dst):
542 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000543 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000544 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200545 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100546 if os.path.islink(src):
547 linkto = os.readlink(src)
548 os.symlink(linkto, real_dst)
549 os.unlink(src)
550 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000551 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400552 raise Error("Cannot move a directory '%s' into itself"
553 " '%s'." % (src, dst))
554 copytree(src, real_dst, copy_function=copy_function,
555 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000556 rmtree(src)
557 else:
R David Murray6ffface2014-06-11 14:40:13 -0400558 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000559 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500560 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000561
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000562def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300563 src = os.path.abspath(src)
564 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000565 if not src.endswith(os.path.sep):
566 src += os.path.sep
567 if not dst.endswith(os.path.sep):
568 dst += os.path.sep
569 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000570
571def _get_gid(name):
572 """Returns a gid, given a group name."""
573 if getgrnam is None or name is None:
574 return None
575 try:
576 result = getgrnam(name)
577 except KeyError:
578 result = None
579 if result is not None:
580 return result[2]
581 return None
582
583def _get_uid(name):
584 """Returns an uid, given a user name."""
585 if getpwnam is None or name is None:
586 return None
587 try:
588 result = getpwnam(name)
589 except KeyError:
590 result = None
591 if result is not None:
592 return result[2]
593 return None
594
595def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
596 owner=None, group=None, logger=None):
597 """Create a (possibly compressed) tar file from all the files under
598 'base_dir'.
599
Serhiy Storchaka11213772014-08-06 18:50:19 +0300600 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000601
602 'owner' and 'group' can be used to define an owner and a group for the
603 archive that is being built. If not provided, the current owner and group
604 will be used.
605
Éric Araujo4433a5f2010-12-15 20:26:30 +0000606 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300607 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000608
609 Returns the output filename.
610 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200611 if compress is None:
612 tar_compression = ''
613 elif _ZLIB_SUPPORTED and compress == 'gzip':
614 tar_compression = 'gz'
615 elif _BZ2_SUPPORTED and compress == 'bzip2':
616 tar_compression = 'bz2'
617 elif _LZMA_SUPPORTED and compress == 'xz':
618 tar_compression = 'xz'
619 else:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000620 raise ValueError("bad value for 'compress', or compression format not "
621 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000622
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200623 import tarfile # late import for breaking circular dependency
624
625 compress_ext = '.' + tar_compression if compress else ''
626 archive_name = base_name + '.tar' + compress_ext
Tarek Ziadé396fad72010-02-23 05:30:31 +0000627 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000628
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200629 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000630 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200631 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000632 if not dry_run:
633 os.makedirs(archive_dir)
634
Tarek Ziadé396fad72010-02-23 05:30:31 +0000635 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000636 if logger is not None:
637 logger.info('Creating tar archive')
638
639 uid = _get_uid(owner)
640 gid = _get_gid(group)
641
642 def _set_uid_gid(tarinfo):
643 if gid is not None:
644 tarinfo.gid = gid
645 tarinfo.gname = group
646 if uid is not None:
647 tarinfo.uid = uid
648 tarinfo.uname = owner
649 return tarinfo
650
651 if not dry_run:
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200652 tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000653 try:
654 tar.add(base_dir, filter=_set_uid_gid)
655 finally:
656 tar.close()
657
Tarek Ziadé396fad72010-02-23 05:30:31 +0000658 return archive_name
659
Tarek Ziadé396fad72010-02-23 05:30:31 +0000660def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
661 """Create a zip file from all the files under 'base_dir'.
662
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200663 The output zip file will be named 'base_name' + ".zip". Returns the
664 name of the output zip file.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000665 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200666 import zipfile # late import for breaking circular dependency
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400667
Tarek Ziadé396fad72010-02-23 05:30:31 +0000668 zip_filename = base_name + ".zip"
669 archive_dir = os.path.dirname(base_name)
670
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200671 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000672 if logger is not None:
673 logger.info("creating %s", archive_dir)
674 if not dry_run:
675 os.makedirs(archive_dir)
676
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400677 if logger is not None:
678 logger.info("creating '%s' and adding '%s' to it",
679 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000680
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400681 if not dry_run:
682 with zipfile.ZipFile(zip_filename, "w",
683 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300684 path = os.path.normpath(base_dir)
Serhiy Storchaka666de772016-10-23 15:55:09 +0300685 if path != os.curdir:
686 zf.write(path, path)
687 if logger is not None:
688 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400689 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300690 for name in sorted(dirnames):
691 path = os.path.normpath(os.path.join(dirpath, name))
692 zf.write(path, path)
693 if logger is not None:
694 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400695 for name in filenames:
696 path = os.path.normpath(os.path.join(dirpath, name))
697 if os.path.isfile(path):
698 zf.write(path, path)
699 if logger is not None:
700 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000701
702 return zip_filename
703
704_ARCHIVE_FORMATS = {
Tarek Ziadé396fad72010-02-23 05:30:31 +0000705 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200706}
707
708if _ZLIB_SUPPORTED:
709 _ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
710 "gzip'ed tar-file")
711 _ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000712
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000713if _BZ2_SUPPORTED:
714 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
715 "bzip2'ed tar-file")
716
Serhiy Storchaka11213772014-08-06 18:50:19 +0300717if _LZMA_SUPPORTED:
718 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
719 "xz'ed tar-file")
720
Tarek Ziadé396fad72010-02-23 05:30:31 +0000721def get_archive_formats():
722 """Returns a list of supported formats for archiving and unarchiving.
723
724 Each element of the returned sequence is a tuple (name, description)
725 """
726 formats = [(name, registry[2]) for name, registry in
727 _ARCHIVE_FORMATS.items()]
728 formats.sort()
729 return formats
730
731def register_archive_format(name, function, extra_args=None, description=''):
732 """Registers an archive format.
733
734 name is the name of the format. function is the callable that will be
735 used to create archives. If provided, extra_args is a sequence of
736 (name, value) tuples that will be passed as arguments to the callable.
737 description can be provided to describe the format, and will be returned
738 by the get_archive_formats() function.
739 """
740 if extra_args is None:
741 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200742 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000743 raise TypeError('The %s object is not callable' % function)
744 if not isinstance(extra_args, (tuple, list)):
745 raise TypeError('extra_args needs to be a sequence')
746 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200747 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000748 raise TypeError('extra_args elements are : (arg_name, value)')
749
750 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
751
752def unregister_archive_format(name):
753 del _ARCHIVE_FORMATS[name]
754
755def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
756 dry_run=0, owner=None, group=None, logger=None):
757 """Create an archive file (eg. zip or tar).
758
759 'base_name' is the name of the file to create, minus any format-specific
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200760 extension; 'format' is the archive format: one of "zip", "tar", "gztar",
761 "bztar", or "xztar". Or any other registered format.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000762
763 'root_dir' is a directory that will be the root directory of the
764 archive; ie. we typically chdir into 'root_dir' before creating the
765 archive. 'base_dir' is the directory where we start archiving from;
766 ie. 'base_dir' will be the common prefix of all files and
767 directories in the archive. 'root_dir' and 'base_dir' both default
768 to the current directory. Returns the name of the archive file.
769
770 'owner' and 'group' are used when creating a tar archive. By default,
771 uses the current owner and group.
772 """
773 save_cwd = os.getcwd()
774 if root_dir is not None:
775 if logger is not None:
776 logger.debug("changing into '%s'", root_dir)
777 base_name = os.path.abspath(base_name)
778 if not dry_run:
779 os.chdir(root_dir)
780
781 if base_dir is None:
782 base_dir = os.curdir
783
784 kwargs = {'dry_run': dry_run, 'logger': logger}
785
786 try:
787 format_info = _ARCHIVE_FORMATS[format]
788 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300789 raise ValueError("unknown archive format '%s'" % format) from None
Tarek Ziadé396fad72010-02-23 05:30:31 +0000790
791 func = format_info[0]
792 for arg, val in format_info[1]:
793 kwargs[arg] = val
794
795 if format != 'zip':
796 kwargs['owner'] = owner
797 kwargs['group'] = group
798
799 try:
800 filename = func(base_name, base_dir, **kwargs)
801 finally:
802 if root_dir is not None:
803 if logger is not None:
804 logger.debug("changing back to '%s'", save_cwd)
805 os.chdir(save_cwd)
806
807 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000808
809
810def get_unpack_formats():
811 """Returns a list of supported formats for unpacking.
812
813 Each element of the returned sequence is a tuple
814 (name, extensions, description)
815 """
816 formats = [(name, info[0], info[3]) for name, info in
817 _UNPACK_FORMATS.items()]
818 formats.sort()
819 return formats
820
821def _check_unpack_options(extensions, function, extra_args):
822 """Checks what gets registered as an unpacker."""
823 # first make sure no other unpacker is registered for this extension
824 existing_extensions = {}
825 for name, info in _UNPACK_FORMATS.items():
826 for ext in info[0]:
827 existing_extensions[ext] = name
828
829 for extension in extensions:
830 if extension in existing_extensions:
831 msg = '%s is already registered for "%s"'
832 raise RegistryError(msg % (extension,
833 existing_extensions[extension]))
834
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200835 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000836 raise TypeError('The registered function must be a callable')
837
838
839def register_unpack_format(name, extensions, function, extra_args=None,
840 description=''):
841 """Registers an unpack format.
842
843 `name` is the name of the format. `extensions` is a list of extensions
844 corresponding to the format.
845
846 `function` is the callable that will be
847 used to unpack archives. The callable will receive archives to unpack.
848 If it's unable to handle an archive, it needs to raise a ReadError
849 exception.
850
851 If provided, `extra_args` is a sequence of
852 (name, value) tuples that will be passed as arguments to the callable.
853 description can be provided to describe the format, and will be returned
854 by the get_unpack_formats() function.
855 """
856 if extra_args is None:
857 extra_args = []
858 _check_unpack_options(extensions, function, extra_args)
859 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
860
861def unregister_unpack_format(name):
Martin Pantereb995702016-07-28 01:11:04 +0000862 """Removes the pack format from the registry."""
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000863 del _UNPACK_FORMATS[name]
864
865def _ensure_directory(path):
866 """Ensure that the parent directory of `path` exists"""
867 dirname = os.path.dirname(path)
868 if not os.path.isdir(dirname):
869 os.makedirs(dirname)
870
871def _unpack_zipfile(filename, extract_dir):
872 """Unpack zip `filename` to `extract_dir`
873 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200874 import zipfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000875
876 if not zipfile.is_zipfile(filename):
877 raise ReadError("%s is not a zip file" % filename)
878
879 zip = zipfile.ZipFile(filename)
880 try:
881 for info in zip.infolist():
882 name = info.filename
883
884 # don't extract absolute paths or ones with .. in them
885 if name.startswith('/') or '..' in name:
886 continue
887
888 target = os.path.join(extract_dir, *name.split('/'))
889 if not target:
890 continue
891
892 _ensure_directory(target)
893 if not name.endswith('/'):
894 # file
895 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200896 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000897 try:
898 f.write(data)
899 finally:
900 f.close()
901 del data
902 finally:
903 zip.close()
904
905def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +0300906 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000907 """
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200908 import tarfile # late import for breaking circular dependency
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000909 try:
910 tarobj = tarfile.open(filename)
911 except tarfile.TarError:
912 raise ReadError(
913 "%s is not a compressed or uncompressed tar file" % filename)
914 try:
915 tarobj.extractall(extract_dir)
916 finally:
917 tarobj.close()
918
919_UNPACK_FORMATS = {
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000920 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200921 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"),
922}
923
924if _ZLIB_SUPPORTED:
925 _UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
926 "gzip'ed tar-file")
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000927
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000928if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +0300929 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000930 "bzip2'ed tar-file")
931
Serhiy Storchaka11213772014-08-06 18:50:19 +0300932if _LZMA_SUPPORTED:
933 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
934 "xz'ed tar-file")
935
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000936def _find_unpack_format(filename):
937 for name, info in _UNPACK_FORMATS.items():
938 for extension in info[0]:
939 if filename.endswith(extension):
940 return name
941 return None
942
943def unpack_archive(filename, extract_dir=None, format=None):
944 """Unpack an archive.
945
946 `filename` is the name of the archive.
947
948 `extract_dir` is the name of the target directory, where the archive
949 is unpacked. If not provided, the current working directory is used.
950
Serhiy Storchaka20cdffd2016-12-16 18:58:33 +0200951 `format` is the archive format: one of "zip", "tar", "gztar", "bztar",
952 or "xztar". Or any other registered format. If not provided,
953 unpack_archive will use the filename extension and see if an unpacker
954 was registered for that extension.
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000955
956 In case none is found, a ValueError is raised.
957 """
958 if extract_dir is None:
959 extract_dir = os.getcwd()
960
Jelle Zijlstraa12df7b2017-05-05 14:27:12 -0700961 extract_dir = os.fspath(extract_dir)
962 filename = os.fspath(filename)
963
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000964 if format is not None:
965 try:
966 format_info = _UNPACK_FORMATS[format]
967 except KeyError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300968 raise ValueError("Unknown unpack format '{0}'".format(format)) from None
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000969
Nick Coghlanabf202d2011-03-16 13:52:20 -0400970 func = format_info[1]
971 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000972 else:
973 # we need to look at the registered unpackers supported extensions
974 format = _find_unpack_format(filename)
975 if format is None:
976 raise ReadError("Unknown archive format '{0}'".format(filename))
977
978 func = _UNPACK_FORMATS[format][1]
979 kwargs = dict(_UNPACK_FORMATS[format][2])
980 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200981
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200982
983if hasattr(os, 'statvfs'):
984
985 __all__.append('disk_usage')
986 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Raymond Hettinger5b798ab2015-08-17 22:04:45 -0700987 _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
988 _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
989 _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200990
991 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200992 """Return disk usage statistics about the given path.
993
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200994 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200995 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200996 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200997 st = os.statvfs(path)
998 free = st.f_bavail * st.f_frsize
999 total = st.f_blocks * st.f_frsize
1000 used = (st.f_blocks - st.f_bfree) * st.f_frsize
1001 return _ntuple_diskusage(total, used, free)
1002
1003elif os.name == 'nt':
1004
1005 import nt
1006 __all__.append('disk_usage')
1007 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
1008
1009 def disk_usage(path):
1010 """Return disk usage statistics about the given path.
1011
Ezio Melotti30b9d5d2013-08-17 15:50:46 +03001012 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001013 'free', which are the amount of total, used and free space, in bytes.
1014 """
1015 total, free = nt._getdiskusage(path)
1016 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001017 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001018
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001019
Sandro Tosid902a142011-08-22 23:28:27 +02001020def chown(path, user=None, group=None):
1021 """Change owner user and group of the given path.
1022
1023 user and group can be the uid/gid or the user/group names, and in that case,
1024 they are converted to their respective uid/gid.
1025 """
1026
1027 if user is None and group is None:
1028 raise ValueError("user and/or group must be set")
1029
1030 _user = user
1031 _group = group
1032
1033 # -1 means don't change it
1034 if user is None:
1035 _user = -1
1036 # user can either be an int (the uid) or a string (the system username)
1037 elif isinstance(user, str):
1038 _user = _get_uid(user)
1039 if _user is None:
1040 raise LookupError("no such user: {!r}".format(user))
1041
1042 if group is None:
1043 _group = -1
1044 elif not isinstance(group, int):
1045 _group = _get_gid(group)
1046 if _group is None:
1047 raise LookupError("no such group: {!r}".format(group))
1048
1049 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001050
1051def get_terminal_size(fallback=(80, 24)):
1052 """Get the size of the terminal window.
1053
1054 For each of the two dimensions, the environment variable, COLUMNS
1055 and LINES respectively, is checked. If the variable is defined and
1056 the value is a positive integer, it is used.
1057
1058 When COLUMNS or LINES is not defined, which is the common case,
1059 the terminal connected to sys.__stdout__ is queried
1060 by invoking os.get_terminal_size.
1061
1062 If the terminal size cannot be successfully queried, either because
1063 the system doesn't support querying, or because we are not
1064 connected to a terminal, the value given in fallback parameter
1065 is used. Fallback defaults to (80, 24) which is the default
1066 size used by many terminal emulators.
1067
1068 The value returned is a named tuple of type os.terminal_size.
1069 """
1070 # columns, lines are the working values
1071 try:
1072 columns = int(os.environ['COLUMNS'])
1073 except (KeyError, ValueError):
1074 columns = 0
1075
1076 try:
1077 lines = int(os.environ['LINES'])
1078 except (KeyError, ValueError):
1079 lines = 0
1080
1081 # only query if necessary
1082 if columns <= 0 or lines <= 0:
1083 try:
1084 size = os.get_terminal_size(sys.__stdout__.fileno())
Serhiy Storchakad30829d2016-04-24 09:58:43 +03001085 except (AttributeError, ValueError, OSError):
1086 # stdout is None, closed, detached, or not a terminal, or
1087 # os.get_terminal_size() is unsupported
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001088 size = os.terminal_size(fallback)
1089 if columns <= 0:
1090 columns = size.columns
1091 if lines <= 0:
1092 lines = size.lines
1093
1094 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001095
1096def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001097 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001098 conforms to the given mode on the PATH, or None if there is no such
1099 file.
1100
1101 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1102 of os.environ.get("PATH"), or can be overridden with a custom search
1103 path.
1104
1105 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001106 # Check that a given file can be accessed with the correct mode.
1107 # Additionally check that `file` is not a directory, as on Windows
1108 # directories pass the os.access check.
1109 def _access_check(fn, mode):
1110 return (os.path.exists(fn) and os.access(fn, mode)
1111 and not os.path.isdir(fn))
1112
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001113 # If we're given a path with a directory part, look it up directly rather
1114 # than referring to PATH directories. This includes checking relative to the
1115 # current directory, e.g. ./script
1116 if os.path.dirname(cmd):
1117 if _access_check(cmd, mode):
1118 return cmd
1119 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001120
Barry Warsaw618738b2013-04-16 11:05:03 -04001121 if path is None:
1122 path = os.environ.get("PATH", os.defpath)
1123 if not path:
1124 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001125 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001126
1127 if sys.platform == "win32":
1128 # The current directory takes precedence on Windows.
1129 if not os.curdir in path:
1130 path.insert(0, os.curdir)
1131
1132 # PATHEXT is necessary to check on Windows.
1133 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1134 # See if the given file matches any of the expected path extensions.
1135 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001136 # If it does match, only test that one, otherwise we have to try
1137 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001138 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1139 files = [cmd]
1140 else:
1141 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001142 else:
1143 # On other platforms you don't have things like PATHEXT to tell you
1144 # what file suffixes are executable, so just pass on cmd as-is.
1145 files = [cmd]
1146
1147 seen = set()
1148 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001149 normdir = os.path.normcase(dir)
1150 if not normdir in seen:
1151 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001152 for thefile in files:
1153 name = os.path.join(dir, thefile)
1154 if _access_check(name, mode):
1155 return name
1156 return None