blob: 90b71986a95767b19bfb23bb61aca3b5b4cfc1c7 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000013import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000014
15try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000016 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010017 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000018 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040019except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000020 _BZ2_SUPPORTED = False
21
22try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030023 import lzma
24 del lzma
25 _LZMA_SUPPORTED = True
26except ImportError:
27 _LZMA_SUPPORTED = False
28
29try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000030 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040031except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000032 getpwnam = None
33
34try:
35 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040036except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000037 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000038
Tarek Ziadéc3399782010-02-23 05:39:18 +000039__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
40 "copytree", "move", "rmtree", "Error", "SpecialFileError",
41 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000042 "register_archive_format", "unregister_archive_format",
43 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020044 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020045 "ignore_patterns", "chown", "which", "get_terminal_size",
46 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020047 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000048
Andrew Svetlov3438fa42012-12-17 23:35:18 +020049class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000050 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000051
Hynek Schlawack48653762012-10-07 12:49:58 +020052class SameFileError(Error):
53 """Raised when source and destination are the same file."""
54
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000056 """Raised when trying to do a kind of operation (e.g. copying) which is
57 not supported on a special file (e.g. a named pipe)"""
58
Andrew Svetlov3438fa42012-12-17 23:35:18 +020059class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000060 """Raised when a command could not be executed"""
61
Andrew Svetlov3438fa42012-12-17 23:35:18 +020062class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000063 """Raised when an archive cannot be read"""
64
65class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030066 """Raised when a registry operation with the archiving
Raymond Hettinger15f44ab2016-08-30 10:47:49 -070067 and unpacking registries fails"""
Tarek Ziadé6ac91722010-04-28 17:51:36 +000068
69
Greg Stein42bb8b32000-07-12 09:55:30 +000070def copyfileobj(fsrc, fdst, length=16*1024):
71 """copy data from file-like object fsrc to file-like object fdst"""
72 while 1:
73 buf = fsrc.read(length)
74 if not buf:
75 break
76 fdst.write(buf)
77
Johannes Gijsbers46f14592004-08-14 13:30:02 +000078def _samefile(src, dst):
79 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000080 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000081 try:
82 return os.path.samefile(src, dst)
83 except OSError:
84 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000085
86 # All other platforms: check for same pathname.
87 return (os.path.normcase(os.path.abspath(src)) ==
88 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000089
Larry Hastingsb4038062012-07-15 10:57:38 -070090def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010091 """Copy data from src to dst.
92
Larry Hastingsb4038062012-07-15 10:57:38 -070093 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +010094 symlink will be created instead of copying the file it points to.
95
96 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000097 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +020098 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000099
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000100 for fn in [src, dst]:
101 try:
102 st = os.stat(fn)
103 except OSError:
104 # File most likely does not exist
105 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000106 else:
107 # XXX What about other special files? (sockets, devices...)
108 if stat.S_ISFIFO(st.st_mode):
109 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000110
Larry Hastingsb4038062012-07-15 10:57:38 -0700111 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100112 os.symlink(os.readlink(src), dst)
113 else:
114 with open(src, 'rb') as fsrc:
115 with open(dst, 'wb') as fdst:
116 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500117 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000118
Larry Hastingsb4038062012-07-15 10:57:38 -0700119def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100120 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000121
Larry Hastingsb4038062012-07-15 10:57:38 -0700122 If follow_symlinks is not set, symlinks aren't followed if and only
123 if both `src` and `dst` are symlinks. If `lchmod` isn't available
124 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100125
126 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700127 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100128 if hasattr(os, 'lchmod'):
129 stat_func, chmod_func = os.lstat, os.lchmod
130 else:
131 return
132 elif hasattr(os, 'chmod'):
133 stat_func, chmod_func = os.stat, os.chmod
134 else:
135 return
136
137 st = stat_func(src)
138 chmod_func(dst, stat.S_IMODE(st.st_mode))
139
Larry Hastingsad5ae042012-07-14 17:55:11 -0700140if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700141 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700142 """Copy extended filesystem attributes from `src` to `dst`.
143
144 Overwrite existing attributes.
145
Larry Hastingsb4038062012-07-15 10:57:38 -0700146 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700147
148 """
149
Hynek Schlawack0beab052013-02-05 08:22:44 +0100150 try:
151 names = os.listxattr(src, follow_symlinks=follow_symlinks)
152 except OSError as e:
153 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
154 raise
155 return
156 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700157 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700158 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
159 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700160 except OSError as e:
161 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
162 raise
163else:
164 def _copyxattr(*args, **kwargs):
165 pass
166
Larry Hastingsb4038062012-07-15 10:57:38 -0700167def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100168 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
169
Larry Hastingsb4038062012-07-15 10:57:38 -0700170 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100171 only if both `src` and `dst` are symlinks.
172
173 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700174 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100175 pass
176
Larry Hastings9cf065c2012-06-22 16:30:09 -0700177 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700178 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700179 if follow:
180 # use the real function if it exists
181 def lookup(name):
182 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100183 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700184 # use the real function only if it exists
185 # *and* it supports follow_symlinks
186 def lookup(name):
187 fn = getattr(os, name, _nop)
188 if fn in os.supports_follow_symlinks:
189 return fn
190 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100191
Larry Hastings9cf065c2012-06-22 16:30:09 -0700192 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000193 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700194 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
195 follow_symlinks=follow)
196 try:
197 lookup("chmod")(dst, mode, follow_symlinks=follow)
198 except NotImplementedError:
199 # if we got a NotImplementedError, it's because
200 # * follow_symlinks=False,
201 # * lchown() is unavailable, and
202 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300203 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700204 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
205 # (it returned ENOSUP.)
206 # therefore we're out of options--we simply cannot chown the
207 # symlink. give up, suppress the error.
208 # (which is what shutil always did in this circumstance.)
209 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100210 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000211 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700212 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000213 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700214 for err in 'EOPNOTSUPP', 'ENOTSUP':
215 if hasattr(errno, err) and why.errno == getattr(errno, err):
216 break
217 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000218 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700219 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200220
Larry Hastingsb4038062012-07-15 10:57:38 -0700221def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500222 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000223
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000224 The destination may be a directory.
225
Larry Hastingsb4038062012-07-15 10:57:38 -0700226 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100227 resembles GNU's "cp -P src dst".
228
Hynek Schlawack48653762012-10-07 12:49:58 +0200229 If source and destination are the same file, a SameFileError will be
230 raised.
231
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000232 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000233 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000234 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700235 copyfile(src, dst, follow_symlinks=follow_symlinks)
236 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500237 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000238
Larry Hastingsb4038062012-07-15 10:57:38 -0700239def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500240 """Copy data and all stat info ("cp -p src dst"). Return the file's
241 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000242
243 The destination may be a directory.
244
Larry Hastingsb4038062012-07-15 10:57:38 -0700245 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100246 resembles GNU's "cp -P src dst".
247
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000248 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000249 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700251 copyfile(src, dst, follow_symlinks=follow_symlinks)
252 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500253 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000254
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255def ignore_patterns(*patterns):
256 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000257
Georg Brandl2ee470f2008-07-16 12:55:28 +0000258 Patterns is a sequence of glob-style patterns
259 that are used to exclude files"""
260 def _ignore_patterns(path, names):
261 ignored_names = []
262 for pattern in patterns:
263 ignored_names.extend(fnmatch.filter(names, pattern))
264 return set(ignored_names)
265 return _ignore_patterns
266
Tarek Ziadéfb437512010-04-20 08:57:33 +0000267def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
268 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000269 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000270
271 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000272 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000273
274 If the optional symlinks flag is true, symbolic links in the
275 source tree result in symbolic links in the destination tree; if
276 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000277 links are copied. If the file pointed by the symlink doesn't
278 exist, an exception will be added in the list of errors raised in
279 an Error exception at the end of the copy process.
280
281 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000282 want to silence this exception. Notice that this has no effect on
283 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000284
Georg Brandl2ee470f2008-07-16 12:55:28 +0000285 The optional ignore argument is a callable. If given, it
286 is called with the `src` parameter, which is the directory
287 being visited by copytree(), and `names` which is the list of
288 `src` contents, as returned by os.listdir():
289
290 callable(src, names) -> ignored_names
291
292 Since copytree() is called recursively, the callable will be
293 called once for each directory that is copied. It returns a
294 list of names relative to the `src` directory that should
295 not be copied.
296
Tarek Ziadé5340db32010-04-19 22:30:51 +0000297 The optional copy_function argument is a callable that will be used
298 to copy each file. It will be called with the source path and the
299 destination path as arguments. By default, copy2() is used, but any
300 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000301
302 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000303 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000304 if ignore is not None:
305 ignored_names = ignore(src, names)
306 else:
307 ignored_names = set()
308
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000309 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000310 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000311 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000312 if name in ignored_names:
313 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000314 srcname = os.path.join(src, name)
315 dstname = os.path.join(dst, name)
316 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000317 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000318 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000319 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100320 # We can't just leave it to `copy_function` because legacy
321 # code with a custom `copy_function` may rely on copytree
322 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000323 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700324 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000325 else:
326 # ignore dangling symlink if the flag is on
327 if not os.path.exists(linkto) and ignore_dangling_symlinks:
328 continue
329 # otherwise let the copy occurs. copy2 will raise an error
Berker Peksag5a294d82015-07-25 14:53:48 +0300330 if os.path.isdir(srcname):
331 copytree(srcname, dstname, symlinks, ignore,
332 copy_function)
333 else:
334 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000335 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000336 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000337 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000338 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000339 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000340 # catch the Error from the recursive copytree so that we can
341 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000342 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000343 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200344 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000345 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000346 try:
347 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000348 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200349 # Copying file access times may fail on Windows
Berker Peksag884afd92014-12-10 02:50:32 +0200350 if getattr(why, 'winerror', None) is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200351 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000352 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000353 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500354 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000355
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200356# version vulnerable to race conditions
357def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000358 try:
359 if os.path.islink(path):
360 # symlinks to directories are forbidden, see bug #1669
361 raise OSError("Cannot call rmtree on a symbolic link")
362 except OSError:
363 onerror(os.path.islink, path, sys.exc_info())
364 # can't continue even if onerror hook returns
365 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000366 names = []
367 try:
368 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200369 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000370 onerror(os.listdir, path, sys.exc_info())
371 for name in names:
372 fullname = os.path.join(path, name)
373 try:
374 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200375 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000376 mode = 0
377 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200378 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000379 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000380 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200381 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200382 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200383 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000384 try:
385 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200386 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000387 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000388
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200389# Version using fd-based APIs to protect against races
390def _rmtree_safe_fd(topfd, path, onerror):
391 names = []
392 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200393 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100394 except OSError as err:
395 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200396 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200397 for name in names:
398 fullname = os.path.join(path, name)
399 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200400 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200401 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100402 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200403 mode = 0
404 if stat.S_ISDIR(mode):
405 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200406 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100407 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200408 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200409 else:
410 try:
411 if os.path.samestat(orig_st, os.fstat(dirfd)):
412 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200413 try:
414 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100415 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200416 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100417 else:
418 try:
419 # This can only happen if someone replaces
420 # a directory with a symlink after the call to
421 # stat.S_ISDIR above.
422 raise OSError("Cannot call rmtree on a symbolic "
423 "link")
424 except OSError:
425 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200426 finally:
427 os.close(dirfd)
428 else:
429 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200430 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100431 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200432 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200433
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200434_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
435 os.supports_dir_fd and
436 os.listdir in os.supports_fd and
437 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000438
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200439def rmtree(path, ignore_errors=False, onerror=None):
440 """Recursively delete a directory tree.
441
442 If ignore_errors is set, errors are ignored; otherwise, if onerror
443 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200444 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200445 path is the argument to that function that caused it to fail; and
446 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
447 is false and onerror is None, an exception is raised.
448
449 """
450 if ignore_errors:
451 def onerror(*args):
452 pass
453 elif onerror is None:
454 def onerror(*args):
455 raise
456 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200457 # While the unsafe rmtree works fine on bytes, the fd based does not.
458 if isinstance(path, bytes):
459 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200460 # Note: To guard against symlink races, we use the standard
461 # lstat()/open()/fstat() trick.
462 try:
463 orig_st = os.lstat(path)
464 except Exception:
465 onerror(os.lstat, path, sys.exc_info())
466 return
467 try:
468 fd = os.open(path, os.O_RDONLY)
469 except Exception:
470 onerror(os.lstat, path, sys.exc_info())
471 return
472 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100473 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200474 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200475 try:
476 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200477 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200478 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200479 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100480 try:
481 # symlinks to directories are forbidden, see bug #1669
482 raise OSError("Cannot call rmtree on a symbolic link")
483 except OSError:
484 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200485 finally:
486 os.close(fd)
487 else:
488 return _rmtree_unsafe(path, onerror)
489
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000490# Allow introspection of whether or not the hardening against symlink
491# attacks is supported on the current platform
492rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000493
Christian Heimesada8c3b2008-03-18 18:26:33 +0000494def _basename(path):
495 # A basename() variant which first strips the trailing slash, if present.
496 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200497 sep = os.path.sep + (os.path.altsep or '')
498 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000499
R David Murray6ffface2014-06-11 14:40:13 -0400500def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000501 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500502 similar to the Unix "mv" command. Return the file or directory's
503 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000504
505 If the destination is a directory or a symlink to a directory, the source
506 is moved inside the directory. The destination path must not already
507 exist.
508
509 If the destination already exists but is not a directory, it may be
510 overwritten depending on os.rename() semantics.
511
512 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100513 Otherwise, src is copied to the destination and then removed. Symlinks are
514 recreated under the new name if os.rename() fails because of cross
515 filesystem renames.
516
R David Murray6ffface2014-06-11 14:40:13 -0400517 The optional `copy_function` argument is a callable that will be used
518 to copy the source or it will be delegated to `copytree`.
519 By default, copy2() is used, but any function that supports the same
520 signature (like copy()) can be used.
521
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000522 A lot more could be done here... A look at a mv.c shows a lot of
523 the issues this implementation glosses over.
524
525 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000526 real_dst = dst
527 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200528 if _samefile(src, dst):
529 # We might be on a case insensitive filesystem,
530 # perform the rename anyway.
531 os.rename(src, dst)
532 return
533
Christian Heimesada8c3b2008-03-18 18:26:33 +0000534 real_dst = os.path.join(dst, _basename(src))
535 if os.path.exists(real_dst):
536 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000537 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000538 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200539 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100540 if os.path.islink(src):
541 linkto = os.readlink(src)
542 os.symlink(linkto, real_dst)
543 os.unlink(src)
544 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000545 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400546 raise Error("Cannot move a directory '%s' into itself"
547 " '%s'." % (src, dst))
548 copytree(src, real_dst, copy_function=copy_function,
549 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000550 rmtree(src)
551 else:
R David Murray6ffface2014-06-11 14:40:13 -0400552 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000553 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500554 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000555
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000556def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300557 src = os.path.abspath(src)
558 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000559 if not src.endswith(os.path.sep):
560 src += os.path.sep
561 if not dst.endswith(os.path.sep):
562 dst += os.path.sep
563 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000564
565def _get_gid(name):
566 """Returns a gid, given a group name."""
567 if getgrnam is None or name is None:
568 return None
569 try:
570 result = getgrnam(name)
571 except KeyError:
572 result = None
573 if result is not None:
574 return result[2]
575 return None
576
577def _get_uid(name):
578 """Returns an uid, given a user name."""
579 if getpwnam is None or name is None:
580 return None
581 try:
582 result = getpwnam(name)
583 except KeyError:
584 result = None
585 if result is not None:
586 return result[2]
587 return None
588
589def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
590 owner=None, group=None, logger=None):
591 """Create a (possibly compressed) tar file from all the files under
592 'base_dir'.
593
Serhiy Storchaka11213772014-08-06 18:50:19 +0300594 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000595
596 'owner' and 'group' can be used to define an owner and a group for the
597 archive that is being built. If not provided, the current owner and group
598 will be used.
599
Éric Araujo4433a5f2010-12-15 20:26:30 +0000600 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300601 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000602
603 Returns the output filename.
604 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000605 tar_compression = {'gzip': 'gz', None: ''}
606 compress_ext = {'gzip': '.gz'}
607
608 if _BZ2_SUPPORTED:
609 tar_compression['bzip2'] = 'bz2'
610 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000611
Serhiy Storchaka11213772014-08-06 18:50:19 +0300612 if _LZMA_SUPPORTED:
613 tar_compression['xz'] = 'xz'
614 compress_ext['xz'] = '.xz'
615
Tarek Ziadé396fad72010-02-23 05:30:31 +0000616 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200617 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000618 raise ValueError("bad value for 'compress', or compression format not "
619 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000620
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000621 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000622 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000623
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200624 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000625 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200626 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000627 if not dry_run:
628 os.makedirs(archive_dir)
629
Tarek Ziadé396fad72010-02-23 05:30:31 +0000630 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000631 if logger is not None:
632 logger.info('Creating tar archive')
633
634 uid = _get_uid(owner)
635 gid = _get_gid(group)
636
637 def _set_uid_gid(tarinfo):
638 if gid is not None:
639 tarinfo.gid = gid
640 tarinfo.gname = group
641 if uid is not None:
642 tarinfo.uid = uid
643 tarinfo.uname = owner
644 return tarinfo
645
646 if not dry_run:
647 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
648 try:
649 tar.add(base_dir, filter=_set_uid_gid)
650 finally:
651 tar.close()
652
Tarek Ziadé396fad72010-02-23 05:30:31 +0000653 return archive_name
654
Tarek Ziadé396fad72010-02-23 05:30:31 +0000655def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
656 """Create a zip file from all the files under 'base_dir'.
657
Éric Araujo4433a5f2010-12-15 20:26:30 +0000658 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000659 "zipfile" Python module (if available) or the InfoZIP "zip" utility
660 (if installed and found on the default search path). If neither tool is
661 available, raises ExecError. Returns the name of the output zip
662 file.
663 """
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400664 import zipfile
665
Tarek Ziadé396fad72010-02-23 05:30:31 +0000666 zip_filename = base_name + ".zip"
667 archive_dir = os.path.dirname(base_name)
668
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200669 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000670 if logger is not None:
671 logger.info("creating %s", archive_dir)
672 if not dry_run:
673 os.makedirs(archive_dir)
674
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400675 if logger is not None:
676 logger.info("creating '%s' and adding '%s' to it",
677 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000678
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400679 if not dry_run:
680 with zipfile.ZipFile(zip_filename, "w",
681 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300682 path = os.path.normpath(base_dir)
Serhiy Storchaka666de772016-10-23 15:55:09 +0300683 if path != os.curdir:
684 zf.write(path, path)
685 if logger is not None:
686 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400687 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300688 for name in sorted(dirnames):
689 path = os.path.normpath(os.path.join(dirpath, name))
690 zf.write(path, path)
691 if logger is not None:
692 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400693 for name in filenames:
694 path = os.path.normpath(os.path.join(dirpath, name))
695 if os.path.isfile(path):
696 zf.write(path, path)
697 if logger is not None:
698 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000699
700 return zip_filename
701
702_ARCHIVE_FORMATS = {
703 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000704 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200705 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000706 }
707
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000708if _BZ2_SUPPORTED:
709 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
710 "bzip2'ed tar-file")
711
Serhiy Storchaka11213772014-08-06 18:50:19 +0300712if _LZMA_SUPPORTED:
713 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
714 "xz'ed tar-file")
715
Tarek Ziadé396fad72010-02-23 05:30:31 +0000716def get_archive_formats():
717 """Returns a list of supported formats for archiving and unarchiving.
718
719 Each element of the returned sequence is a tuple (name, description)
720 """
721 formats = [(name, registry[2]) for name, registry in
722 _ARCHIVE_FORMATS.items()]
723 formats.sort()
724 return formats
725
726def register_archive_format(name, function, extra_args=None, description=''):
727 """Registers an archive format.
728
729 name is the name of the format. function is the callable that will be
730 used to create archives. If provided, extra_args is a sequence of
731 (name, value) tuples that will be passed as arguments to the callable.
732 description can be provided to describe the format, and will be returned
733 by the get_archive_formats() function.
734 """
735 if extra_args is None:
736 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200737 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000738 raise TypeError('The %s object is not callable' % function)
739 if not isinstance(extra_args, (tuple, list)):
740 raise TypeError('extra_args needs to be a sequence')
741 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200742 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000743 raise TypeError('extra_args elements are : (arg_name, value)')
744
745 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
746
747def unregister_archive_format(name):
748 del _ARCHIVE_FORMATS[name]
749
750def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
751 dry_run=0, owner=None, group=None, logger=None):
752 """Create an archive file (eg. zip or tar).
753
754 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000755 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
756 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000757
758 'root_dir' is a directory that will be the root directory of the
759 archive; ie. we typically chdir into 'root_dir' before creating the
760 archive. 'base_dir' is the directory where we start archiving from;
761 ie. 'base_dir' will be the common prefix of all files and
762 directories in the archive. 'root_dir' and 'base_dir' both default
763 to the current directory. Returns the name of the archive file.
764
765 'owner' and 'group' are used when creating a tar archive. By default,
766 uses the current owner and group.
767 """
768 save_cwd = os.getcwd()
769 if root_dir is not None:
770 if logger is not None:
771 logger.debug("changing into '%s'", root_dir)
772 base_name = os.path.abspath(base_name)
773 if not dry_run:
774 os.chdir(root_dir)
775
776 if base_dir is None:
777 base_dir = os.curdir
778
779 kwargs = {'dry_run': dry_run, 'logger': logger}
780
781 try:
782 format_info = _ARCHIVE_FORMATS[format]
783 except KeyError:
784 raise ValueError("unknown archive format '%s'" % format)
785
786 func = format_info[0]
787 for arg, val in format_info[1]:
788 kwargs[arg] = val
789
790 if format != 'zip':
791 kwargs['owner'] = owner
792 kwargs['group'] = group
793
794 try:
795 filename = func(base_name, base_dir, **kwargs)
796 finally:
797 if root_dir is not None:
798 if logger is not None:
799 logger.debug("changing back to '%s'", save_cwd)
800 os.chdir(save_cwd)
801
802 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000803
804
805def get_unpack_formats():
806 """Returns a list of supported formats for unpacking.
807
808 Each element of the returned sequence is a tuple
809 (name, extensions, description)
810 """
811 formats = [(name, info[0], info[3]) for name, info in
812 _UNPACK_FORMATS.items()]
813 formats.sort()
814 return formats
815
816def _check_unpack_options(extensions, function, extra_args):
817 """Checks what gets registered as an unpacker."""
818 # first make sure no other unpacker is registered for this extension
819 existing_extensions = {}
820 for name, info in _UNPACK_FORMATS.items():
821 for ext in info[0]:
822 existing_extensions[ext] = name
823
824 for extension in extensions:
825 if extension in existing_extensions:
826 msg = '%s is already registered for "%s"'
827 raise RegistryError(msg % (extension,
828 existing_extensions[extension]))
829
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200830 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000831 raise TypeError('The registered function must be a callable')
832
833
834def register_unpack_format(name, extensions, function, extra_args=None,
835 description=''):
836 """Registers an unpack format.
837
838 `name` is the name of the format. `extensions` is a list of extensions
839 corresponding to the format.
840
841 `function` is the callable that will be
842 used to unpack archives. The callable will receive archives to unpack.
843 If it's unable to handle an archive, it needs to raise a ReadError
844 exception.
845
846 If provided, `extra_args` is a sequence of
847 (name, value) tuples that will be passed as arguments to the callable.
848 description can be provided to describe the format, and will be returned
849 by the get_unpack_formats() function.
850 """
851 if extra_args is None:
852 extra_args = []
853 _check_unpack_options(extensions, function, extra_args)
854 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
855
856def unregister_unpack_format(name):
Martin Pantereb995702016-07-28 01:11:04 +0000857 """Removes the pack format from the registry."""
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000858 del _UNPACK_FORMATS[name]
859
860def _ensure_directory(path):
861 """Ensure that the parent directory of `path` exists"""
862 dirname = os.path.dirname(path)
863 if not os.path.isdir(dirname):
864 os.makedirs(dirname)
865
866def _unpack_zipfile(filename, extract_dir):
867 """Unpack zip `filename` to `extract_dir`
868 """
869 try:
870 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400871 except ImportError:
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000872 raise ReadError('zlib not supported, cannot unpack this archive.')
873
874 if not zipfile.is_zipfile(filename):
875 raise ReadError("%s is not a zip file" % filename)
876
877 zip = zipfile.ZipFile(filename)
878 try:
879 for info in zip.infolist():
880 name = info.filename
881
882 # don't extract absolute paths or ones with .. in them
883 if name.startswith('/') or '..' in name:
884 continue
885
886 target = os.path.join(extract_dir, *name.split('/'))
887 if not target:
888 continue
889
890 _ensure_directory(target)
891 if not name.endswith('/'):
892 # file
893 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200894 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000895 try:
896 f.write(data)
897 finally:
898 f.close()
899 del data
900 finally:
901 zip.close()
902
903def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +0300904 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000905 """
906 try:
907 tarobj = tarfile.open(filename)
908 except tarfile.TarError:
909 raise ReadError(
910 "%s is not a compressed or uncompressed tar file" % filename)
911 try:
912 tarobj.extractall(extract_dir)
913 finally:
914 tarobj.close()
915
916_UNPACK_FORMATS = {
917 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000918 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
919 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
920 }
921
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000922if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +0300923 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000924 "bzip2'ed tar-file")
925
Serhiy Storchaka11213772014-08-06 18:50:19 +0300926if _LZMA_SUPPORTED:
927 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
928 "xz'ed tar-file")
929
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000930def _find_unpack_format(filename):
931 for name, info in _UNPACK_FORMATS.items():
932 for extension in info[0]:
933 if filename.endswith(extension):
934 return name
935 return None
936
937def unpack_archive(filename, extract_dir=None, format=None):
938 """Unpack an archive.
939
940 `filename` is the name of the archive.
941
942 `extract_dir` is the name of the target directory, where the archive
943 is unpacked. If not provided, the current working directory is used.
944
945 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
946 other registered format. If not provided, unpack_archive will use the
947 filename extension and see if an unpacker was registered for that
948 extension.
949
950 In case none is found, a ValueError is raised.
951 """
952 if extract_dir is None:
953 extract_dir = os.getcwd()
954
955 if format is not None:
956 try:
957 format_info = _UNPACK_FORMATS[format]
958 except KeyError:
959 raise ValueError("Unknown unpack format '{0}'".format(format))
960
Nick Coghlanabf202d2011-03-16 13:52:20 -0400961 func = format_info[1]
962 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000963 else:
964 # we need to look at the registered unpackers supported extensions
965 format = _find_unpack_format(filename)
966 if format is None:
967 raise ReadError("Unknown archive format '{0}'".format(filename))
968
969 func = _UNPACK_FORMATS[format][1]
970 kwargs = dict(_UNPACK_FORMATS[format][2])
971 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200972
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200973
974if hasattr(os, 'statvfs'):
975
976 __all__.append('disk_usage')
977 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Raymond Hettinger5b798ab2015-08-17 22:04:45 -0700978 _ntuple_diskusage.total.__doc__ = 'Total space in bytes'
979 _ntuple_diskusage.used.__doc__ = 'Used space in bytes'
980 _ntuple_diskusage.free.__doc__ = 'Free space in bytes'
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200981
982 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200983 """Return disk usage statistics about the given path.
984
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200985 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200986 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200987 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200988 st = os.statvfs(path)
989 free = st.f_bavail * st.f_frsize
990 total = st.f_blocks * st.f_frsize
991 used = (st.f_blocks - st.f_bfree) * st.f_frsize
992 return _ntuple_diskusage(total, used, free)
993
994elif os.name == 'nt':
995
996 import nt
997 __all__.append('disk_usage')
998 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
999
1000 def disk_usage(path):
1001 """Return disk usage statistics about the given path.
1002
Ezio Melotti30b9d5d2013-08-17 15:50:46 +03001003 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001004 'free', which are the amount of total, used and free space, in bytes.
1005 """
1006 total, free = nt._getdiskusage(path)
1007 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001008 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001009
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001010
Sandro Tosid902a142011-08-22 23:28:27 +02001011def chown(path, user=None, group=None):
1012 """Change owner user and group of the given path.
1013
1014 user and group can be the uid/gid or the user/group names, and in that case,
1015 they are converted to their respective uid/gid.
1016 """
1017
1018 if user is None and group is None:
1019 raise ValueError("user and/or group must be set")
1020
1021 _user = user
1022 _group = group
1023
1024 # -1 means don't change it
1025 if user is None:
1026 _user = -1
1027 # user can either be an int (the uid) or a string (the system username)
1028 elif isinstance(user, str):
1029 _user = _get_uid(user)
1030 if _user is None:
1031 raise LookupError("no such user: {!r}".format(user))
1032
1033 if group is None:
1034 _group = -1
1035 elif not isinstance(group, int):
1036 _group = _get_gid(group)
1037 if _group is None:
1038 raise LookupError("no such group: {!r}".format(group))
1039
1040 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001041
1042def get_terminal_size(fallback=(80, 24)):
1043 """Get the size of the terminal window.
1044
1045 For each of the two dimensions, the environment variable, COLUMNS
1046 and LINES respectively, is checked. If the variable is defined and
1047 the value is a positive integer, it is used.
1048
1049 When COLUMNS or LINES is not defined, which is the common case,
1050 the terminal connected to sys.__stdout__ is queried
1051 by invoking os.get_terminal_size.
1052
1053 If the terminal size cannot be successfully queried, either because
1054 the system doesn't support querying, or because we are not
1055 connected to a terminal, the value given in fallback parameter
1056 is used. Fallback defaults to (80, 24) which is the default
1057 size used by many terminal emulators.
1058
1059 The value returned is a named tuple of type os.terminal_size.
1060 """
1061 # columns, lines are the working values
1062 try:
1063 columns = int(os.environ['COLUMNS'])
1064 except (KeyError, ValueError):
1065 columns = 0
1066
1067 try:
1068 lines = int(os.environ['LINES'])
1069 except (KeyError, ValueError):
1070 lines = 0
1071
1072 # only query if necessary
1073 if columns <= 0 or lines <= 0:
1074 try:
1075 size = os.get_terminal_size(sys.__stdout__.fileno())
Serhiy Storchakad30829d2016-04-24 09:58:43 +03001076 except (AttributeError, ValueError, OSError):
1077 # stdout is None, closed, detached, or not a terminal, or
1078 # os.get_terminal_size() is unsupported
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001079 size = os.terminal_size(fallback)
1080 if columns <= 0:
1081 columns = size.columns
1082 if lines <= 0:
1083 lines = size.lines
1084
1085 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001086
1087def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001088 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001089 conforms to the given mode on the PATH, or None if there is no such
1090 file.
1091
1092 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1093 of os.environ.get("PATH"), or can be overridden with a custom search
1094 path.
1095
1096 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001097 # Check that a given file can be accessed with the correct mode.
1098 # Additionally check that `file` is not a directory, as on Windows
1099 # directories pass the os.access check.
1100 def _access_check(fn, mode):
1101 return (os.path.exists(fn) and os.access(fn, mode)
1102 and not os.path.isdir(fn))
1103
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001104 # If we're given a path with a directory part, look it up directly rather
1105 # than referring to PATH directories. This includes checking relative to the
1106 # current directory, e.g. ./script
1107 if os.path.dirname(cmd):
1108 if _access_check(cmd, mode):
1109 return cmd
1110 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001111
Barry Warsaw618738b2013-04-16 11:05:03 -04001112 if path is None:
1113 path = os.environ.get("PATH", os.defpath)
1114 if not path:
1115 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001116 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001117
1118 if sys.platform == "win32":
1119 # The current directory takes precedence on Windows.
1120 if not os.curdir in path:
1121 path.insert(0, os.curdir)
1122
1123 # PATHEXT is necessary to check on Windows.
1124 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1125 # See if the given file matches any of the expected path extensions.
1126 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001127 # If it does match, only test that one, otherwise we have to try
1128 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001129 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1130 files = [cmd]
1131 else:
1132 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001133 else:
1134 # On other platforms you don't have things like PATHEXT to tell you
1135 # what file suffixes are executable, so just pass on cmd as-is.
1136 files = [cmd]
1137
1138 seen = set()
1139 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001140 normdir = os.path.normcase(dir)
1141 if not normdir in seen:
1142 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001143 for thefile in files:
1144 name = os.path.join(dir, thefile)
1145 if _access_check(name, mode):
1146 return name
1147 return None