blob: 61dc8045a9fac55408ce40966c925f0afd1c0a97 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000013import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000014
15try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000016 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010017 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000018 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040019except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000020 _BZ2_SUPPORTED = False
21
22try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030023 import lzma
24 del lzma
25 _LZMA_SUPPORTED = True
26except ImportError:
27 _LZMA_SUPPORTED = False
28
29try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000030 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040031except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000032 getpwnam = None
33
34try:
35 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040036except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000037 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000038
Tarek Ziadéc3399782010-02-23 05:39:18 +000039__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
40 "copytree", "move", "rmtree", "Error", "SpecialFileError",
41 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000042 "register_archive_format", "unregister_archive_format",
43 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020044 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020045 "ignore_patterns", "chown", "which", "get_terminal_size",
46 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020047 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000048
Andrew Svetlov3438fa42012-12-17 23:35:18 +020049class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000050 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000051
Hynek Schlawack48653762012-10-07 12:49:58 +020052class SameFileError(Error):
53 """Raised when source and destination are the same file."""
54
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000056 """Raised when trying to do a kind of operation (e.g. copying) which is
57 not supported on a special file (e.g. a named pipe)"""
58
Andrew Svetlov3438fa42012-12-17 23:35:18 +020059class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000060 """Raised when a command could not be executed"""
61
Andrew Svetlov3438fa42012-12-17 23:35:18 +020062class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000063 """Raised when an archive cannot be read"""
64
65class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030066 """Raised when a registry operation with the archiving
Tarek Ziadé6ac91722010-04-28 17:51:36 +000067 and unpacking registeries fails"""
68
69
Greg Stein42bb8b32000-07-12 09:55:30 +000070def copyfileobj(fsrc, fdst, length=16*1024):
71 """copy data from file-like object fsrc to file-like object fdst"""
72 while 1:
73 buf = fsrc.read(length)
74 if not buf:
75 break
76 fdst.write(buf)
77
Johannes Gijsbers46f14592004-08-14 13:30:02 +000078def _samefile(src, dst):
79 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000080 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000081 try:
82 return os.path.samefile(src, dst)
83 except OSError:
84 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000085
86 # All other platforms: check for same pathname.
87 return (os.path.normcase(os.path.abspath(src)) ==
88 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000089
Larry Hastingsb4038062012-07-15 10:57:38 -070090def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010091 """Copy data from src to dst.
92
Larry Hastingsb4038062012-07-15 10:57:38 -070093 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +010094 symlink will be created instead of copying the file it points to.
95
96 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000097 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +020098 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000099
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000100 for fn in [src, dst]:
101 try:
102 st = os.stat(fn)
103 except OSError:
104 # File most likely does not exist
105 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000106 else:
107 # XXX What about other special files? (sockets, devices...)
108 if stat.S_ISFIFO(st.st_mode):
109 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000110
Larry Hastingsb4038062012-07-15 10:57:38 -0700111 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100112 os.symlink(os.readlink(src), dst)
113 else:
114 with open(src, 'rb') as fsrc:
115 with open(dst, 'wb') as fdst:
116 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500117 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000118
Larry Hastingsb4038062012-07-15 10:57:38 -0700119def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100120 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000121
Larry Hastingsb4038062012-07-15 10:57:38 -0700122 If follow_symlinks is not set, symlinks aren't followed if and only
123 if both `src` and `dst` are symlinks. If `lchmod` isn't available
124 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100125
126 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700127 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100128 if hasattr(os, 'lchmod'):
129 stat_func, chmod_func = os.lstat, os.lchmod
130 else:
131 return
132 elif hasattr(os, 'chmod'):
133 stat_func, chmod_func = os.stat, os.chmod
134 else:
135 return
136
137 st = stat_func(src)
138 chmod_func(dst, stat.S_IMODE(st.st_mode))
139
Larry Hastingsad5ae042012-07-14 17:55:11 -0700140if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700141 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700142 """Copy extended filesystem attributes from `src` to `dst`.
143
144 Overwrite existing attributes.
145
Larry Hastingsb4038062012-07-15 10:57:38 -0700146 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700147
148 """
149
Hynek Schlawack0beab052013-02-05 08:22:44 +0100150 try:
151 names = os.listxattr(src, follow_symlinks=follow_symlinks)
152 except OSError as e:
153 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
154 raise
155 return
156 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700157 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700158 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
159 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700160 except OSError as e:
161 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
162 raise
163else:
164 def _copyxattr(*args, **kwargs):
165 pass
166
Larry Hastingsb4038062012-07-15 10:57:38 -0700167def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100168 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
169
Larry Hastingsb4038062012-07-15 10:57:38 -0700170 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100171 only if both `src` and `dst` are symlinks.
172
173 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700174 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100175 pass
176
Larry Hastings9cf065c2012-06-22 16:30:09 -0700177 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700178 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700179 if follow:
180 # use the real function if it exists
181 def lookup(name):
182 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100183 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700184 # use the real function only if it exists
185 # *and* it supports follow_symlinks
186 def lookup(name):
187 fn = getattr(os, name, _nop)
188 if fn in os.supports_follow_symlinks:
189 return fn
190 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100191
Larry Hastings9cf065c2012-06-22 16:30:09 -0700192 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000193 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700194 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
195 follow_symlinks=follow)
196 try:
197 lookup("chmod")(dst, mode, follow_symlinks=follow)
198 except NotImplementedError:
199 # if we got a NotImplementedError, it's because
200 # * follow_symlinks=False,
201 # * lchown() is unavailable, and
202 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300203 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700204 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
205 # (it returned ENOSUP.)
206 # therefore we're out of options--we simply cannot chown the
207 # symlink. give up, suppress the error.
208 # (which is what shutil always did in this circumstance.)
209 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100210 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000211 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700212 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000213 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700214 for err in 'EOPNOTSUPP', 'ENOTSUP':
215 if hasattr(errno, err) and why.errno == getattr(errno, err):
216 break
217 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000218 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700219 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200220
Larry Hastingsb4038062012-07-15 10:57:38 -0700221def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500222 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000223
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000224 The destination may be a directory.
225
Larry Hastingsb4038062012-07-15 10:57:38 -0700226 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100227 resembles GNU's "cp -P src dst".
228
Hynek Schlawack48653762012-10-07 12:49:58 +0200229 If source and destination are the same file, a SameFileError will be
230 raised.
231
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000232 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000233 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000234 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700235 copyfile(src, dst, follow_symlinks=follow_symlinks)
236 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500237 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000238
Larry Hastingsb4038062012-07-15 10:57:38 -0700239def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500240 """Copy data and all stat info ("cp -p src dst"). Return the file's
241 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000242
243 The destination may be a directory.
244
Larry Hastingsb4038062012-07-15 10:57:38 -0700245 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100246 resembles GNU's "cp -P src dst".
247
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000248 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000249 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700251 copyfile(src, dst, follow_symlinks=follow_symlinks)
252 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500253 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000254
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255def ignore_patterns(*patterns):
256 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000257
Georg Brandl2ee470f2008-07-16 12:55:28 +0000258 Patterns is a sequence of glob-style patterns
259 that are used to exclude files"""
260 def _ignore_patterns(path, names):
261 ignored_names = []
262 for pattern in patterns:
263 ignored_names.extend(fnmatch.filter(names, pattern))
264 return set(ignored_names)
265 return _ignore_patterns
266
Tarek Ziadéfb437512010-04-20 08:57:33 +0000267def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
268 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000269 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000270
271 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000272 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000273
274 If the optional symlinks flag is true, symbolic links in the
275 source tree result in symbolic links in the destination tree; if
276 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000277 links are copied. If the file pointed by the symlink doesn't
278 exist, an exception will be added in the list of errors raised in
279 an Error exception at the end of the copy process.
280
281 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000282 want to silence this exception. Notice that this has no effect on
283 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000284
Georg Brandl2ee470f2008-07-16 12:55:28 +0000285 The optional ignore argument is a callable. If given, it
286 is called with the `src` parameter, which is the directory
287 being visited by copytree(), and `names` which is the list of
288 `src` contents, as returned by os.listdir():
289
290 callable(src, names) -> ignored_names
291
292 Since copytree() is called recursively, the callable will be
293 called once for each directory that is copied. It returns a
294 list of names relative to the `src` directory that should
295 not be copied.
296
Tarek Ziadé5340db32010-04-19 22:30:51 +0000297 The optional copy_function argument is a callable that will be used
298 to copy each file. It will be called with the source path and the
299 destination path as arguments. By default, copy2() is used, but any
300 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000301
302 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000303 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000304 if ignore is not None:
305 ignored_names = ignore(src, names)
306 else:
307 ignored_names = set()
308
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000309 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000310 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000311 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000312 if name in ignored_names:
313 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000314 srcname = os.path.join(src, name)
315 dstname = os.path.join(dst, name)
316 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000317 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000318 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000319 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100320 # We can't just leave it to `copy_function` because legacy
321 # code with a custom `copy_function` may rely on copytree
322 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000323 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700324 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000325 else:
326 # ignore dangling symlink if the flag is on
327 if not os.path.exists(linkto) and ignore_dangling_symlinks:
328 continue
329 # otherwise let the copy occurs. copy2 will raise an error
330 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000331 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000332 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000333 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000334 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000335 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000336 # catch the Error from the recursive copytree so that we can
337 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000338 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000339 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200340 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000341 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000342 try:
343 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000344 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200345 # Copying file access times may fail on Windows
Berker Peksag884afd92014-12-10 02:50:32 +0200346 if getattr(why, 'winerror', None) is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200347 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000348 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000349 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500350 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000351
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200352# version vulnerable to race conditions
353def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000354 try:
355 if os.path.islink(path):
356 # symlinks to directories are forbidden, see bug #1669
357 raise OSError("Cannot call rmtree on a symbolic link")
358 except OSError:
359 onerror(os.path.islink, path, sys.exc_info())
360 # can't continue even if onerror hook returns
361 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000362 names = []
363 try:
364 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200365 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000366 onerror(os.listdir, path, sys.exc_info())
367 for name in names:
368 fullname = os.path.join(path, name)
369 try:
370 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200371 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000372 mode = 0
373 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200374 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000375 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000376 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200377 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200378 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200379 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000380 try:
381 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200382 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000383 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000384
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200385# Version using fd-based APIs to protect against races
386def _rmtree_safe_fd(topfd, path, onerror):
387 names = []
388 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200389 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100390 except OSError as err:
391 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200392 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200393 for name in names:
394 fullname = os.path.join(path, name)
395 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200396 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200397 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100398 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200399 mode = 0
400 if stat.S_ISDIR(mode):
401 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200402 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100403 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200404 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200405 else:
406 try:
407 if os.path.samestat(orig_st, os.fstat(dirfd)):
408 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200409 try:
410 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100411 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200412 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100413 else:
414 try:
415 # This can only happen if someone replaces
416 # a directory with a symlink after the call to
417 # stat.S_ISDIR above.
418 raise OSError("Cannot call rmtree on a symbolic "
419 "link")
420 except OSError:
421 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200422 finally:
423 os.close(dirfd)
424 else:
425 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200426 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100427 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200428 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200429
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200430_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
431 os.supports_dir_fd and
432 os.listdir in os.supports_fd and
433 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000434
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200435def rmtree(path, ignore_errors=False, onerror=None):
436 """Recursively delete a directory tree.
437
438 If ignore_errors is set, errors are ignored; otherwise, if onerror
439 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200440 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200441 path is the argument to that function that caused it to fail; and
442 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
443 is false and onerror is None, an exception is raised.
444
445 """
446 if ignore_errors:
447 def onerror(*args):
448 pass
449 elif onerror is None:
450 def onerror(*args):
451 raise
452 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200453 # While the unsafe rmtree works fine on bytes, the fd based does not.
454 if isinstance(path, bytes):
455 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200456 # Note: To guard against symlink races, we use the standard
457 # lstat()/open()/fstat() trick.
458 try:
459 orig_st = os.lstat(path)
460 except Exception:
461 onerror(os.lstat, path, sys.exc_info())
462 return
463 try:
464 fd = os.open(path, os.O_RDONLY)
465 except Exception:
466 onerror(os.lstat, path, sys.exc_info())
467 return
468 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100469 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200470 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200471 try:
472 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200473 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200474 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200475 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100476 try:
477 # symlinks to directories are forbidden, see bug #1669
478 raise OSError("Cannot call rmtree on a symbolic link")
479 except OSError:
480 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200481 finally:
482 os.close(fd)
483 else:
484 return _rmtree_unsafe(path, onerror)
485
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000486# Allow introspection of whether or not the hardening against symlink
487# attacks is supported on the current platform
488rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000489
Christian Heimesada8c3b2008-03-18 18:26:33 +0000490def _basename(path):
491 # A basename() variant which first strips the trailing slash, if present.
492 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200493 sep = os.path.sep + (os.path.altsep or '')
494 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000495
R David Murray6ffface2014-06-11 14:40:13 -0400496def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000497 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500498 similar to the Unix "mv" command. Return the file or directory's
499 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000500
501 If the destination is a directory or a symlink to a directory, the source
502 is moved inside the directory. The destination path must not already
503 exist.
504
505 If the destination already exists but is not a directory, it may be
506 overwritten depending on os.rename() semantics.
507
508 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100509 Otherwise, src is copied to the destination and then removed. Symlinks are
510 recreated under the new name if os.rename() fails because of cross
511 filesystem renames.
512
R David Murray6ffface2014-06-11 14:40:13 -0400513 The optional `copy_function` argument is a callable that will be used
514 to copy the source or it will be delegated to `copytree`.
515 By default, copy2() is used, but any function that supports the same
516 signature (like copy()) can be used.
517
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000518 A lot more could be done here... A look at a mv.c shows a lot of
519 the issues this implementation glosses over.
520
521 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000522 real_dst = dst
523 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200524 if _samefile(src, dst):
525 # We might be on a case insensitive filesystem,
526 # perform the rename anyway.
527 os.rename(src, dst)
528 return
529
Christian Heimesada8c3b2008-03-18 18:26:33 +0000530 real_dst = os.path.join(dst, _basename(src))
531 if os.path.exists(real_dst):
532 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000533 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000534 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200535 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100536 if os.path.islink(src):
537 linkto = os.readlink(src)
538 os.symlink(linkto, real_dst)
539 os.unlink(src)
540 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000541 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400542 raise Error("Cannot move a directory '%s' into itself"
543 " '%s'." % (src, dst))
544 copytree(src, real_dst, copy_function=copy_function,
545 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000546 rmtree(src)
547 else:
R David Murray6ffface2014-06-11 14:40:13 -0400548 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000549 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500550 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000551
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000552def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300553 src = os.path.abspath(src)
554 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000555 if not src.endswith(os.path.sep):
556 src += os.path.sep
557 if not dst.endswith(os.path.sep):
558 dst += os.path.sep
559 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000560
561def _get_gid(name):
562 """Returns a gid, given a group name."""
563 if getgrnam is None or name is None:
564 return None
565 try:
566 result = getgrnam(name)
567 except KeyError:
568 result = None
569 if result is not None:
570 return result[2]
571 return None
572
573def _get_uid(name):
574 """Returns an uid, given a user name."""
575 if getpwnam is None or name is None:
576 return None
577 try:
578 result = getpwnam(name)
579 except KeyError:
580 result = None
581 if result is not None:
582 return result[2]
583 return None
584
585def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
586 owner=None, group=None, logger=None):
587 """Create a (possibly compressed) tar file from all the files under
588 'base_dir'.
589
Serhiy Storchaka11213772014-08-06 18:50:19 +0300590 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000591
592 'owner' and 'group' can be used to define an owner and a group for the
593 archive that is being built. If not provided, the current owner and group
594 will be used.
595
Éric Araujo4433a5f2010-12-15 20:26:30 +0000596 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300597 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000598
599 Returns the output filename.
600 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000601 tar_compression = {'gzip': 'gz', None: ''}
602 compress_ext = {'gzip': '.gz'}
603
604 if _BZ2_SUPPORTED:
605 tar_compression['bzip2'] = 'bz2'
606 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000607
Serhiy Storchaka11213772014-08-06 18:50:19 +0300608 if _LZMA_SUPPORTED:
609 tar_compression['xz'] = 'xz'
610 compress_ext['xz'] = '.xz'
611
Tarek Ziadé396fad72010-02-23 05:30:31 +0000612 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200613 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000614 raise ValueError("bad value for 'compress', or compression format not "
615 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000616
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000617 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000618 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000619
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200620 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000621 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200622 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000623 if not dry_run:
624 os.makedirs(archive_dir)
625
Tarek Ziadé396fad72010-02-23 05:30:31 +0000626 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000627 if logger is not None:
628 logger.info('Creating tar archive')
629
630 uid = _get_uid(owner)
631 gid = _get_gid(group)
632
633 def _set_uid_gid(tarinfo):
634 if gid is not None:
635 tarinfo.gid = gid
636 tarinfo.gname = group
637 if uid is not None:
638 tarinfo.uid = uid
639 tarinfo.uname = owner
640 return tarinfo
641
642 if not dry_run:
643 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
644 try:
645 tar.add(base_dir, filter=_set_uid_gid)
646 finally:
647 tar.close()
648
Tarek Ziadé396fad72010-02-23 05:30:31 +0000649 return archive_name
650
Tarek Ziadé396fad72010-02-23 05:30:31 +0000651def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
652 """Create a zip file from all the files under 'base_dir'.
653
Éric Araujo4433a5f2010-12-15 20:26:30 +0000654 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000655 "zipfile" Python module (if available) or the InfoZIP "zip" utility
656 (if installed and found on the default search path). If neither tool is
657 available, raises ExecError. Returns the name of the output zip
658 file.
659 """
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400660 import zipfile
661
Tarek Ziadé396fad72010-02-23 05:30:31 +0000662 zip_filename = base_name + ".zip"
663 archive_dir = os.path.dirname(base_name)
664
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200665 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000666 if logger is not None:
667 logger.info("creating %s", archive_dir)
668 if not dry_run:
669 os.makedirs(archive_dir)
670
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400671 if logger is not None:
672 logger.info("creating '%s' and adding '%s' to it",
673 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000674
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400675 if not dry_run:
676 with zipfile.ZipFile(zip_filename, "w",
677 compression=zipfile.ZIP_DEFLATED) as zf:
678 for dirpath, dirnames, filenames in os.walk(base_dir):
679 for name in filenames:
680 path = os.path.normpath(os.path.join(dirpath, name))
681 if os.path.isfile(path):
682 zf.write(path, path)
683 if logger is not None:
684 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000685
686 return zip_filename
687
688_ARCHIVE_FORMATS = {
689 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000690 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200691 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000692 }
693
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000694if _BZ2_SUPPORTED:
695 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
696 "bzip2'ed tar-file")
697
Serhiy Storchaka11213772014-08-06 18:50:19 +0300698if _LZMA_SUPPORTED:
699 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
700 "xz'ed tar-file")
701
Tarek Ziadé396fad72010-02-23 05:30:31 +0000702def get_archive_formats():
703 """Returns a list of supported formats for archiving and unarchiving.
704
705 Each element of the returned sequence is a tuple (name, description)
706 """
707 formats = [(name, registry[2]) for name, registry in
708 _ARCHIVE_FORMATS.items()]
709 formats.sort()
710 return formats
711
712def register_archive_format(name, function, extra_args=None, description=''):
713 """Registers an archive format.
714
715 name is the name of the format. function is the callable that will be
716 used to create archives. If provided, extra_args is a sequence of
717 (name, value) tuples that will be passed as arguments to the callable.
718 description can be provided to describe the format, and will be returned
719 by the get_archive_formats() function.
720 """
721 if extra_args is None:
722 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200723 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000724 raise TypeError('The %s object is not callable' % function)
725 if not isinstance(extra_args, (tuple, list)):
726 raise TypeError('extra_args needs to be a sequence')
727 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200728 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000729 raise TypeError('extra_args elements are : (arg_name, value)')
730
731 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
732
733def unregister_archive_format(name):
734 del _ARCHIVE_FORMATS[name]
735
736def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
737 dry_run=0, owner=None, group=None, logger=None):
738 """Create an archive file (eg. zip or tar).
739
740 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000741 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
742 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000743
744 'root_dir' is a directory that will be the root directory of the
745 archive; ie. we typically chdir into 'root_dir' before creating the
746 archive. 'base_dir' is the directory where we start archiving from;
747 ie. 'base_dir' will be the common prefix of all files and
748 directories in the archive. 'root_dir' and 'base_dir' both default
749 to the current directory. Returns the name of the archive file.
750
751 'owner' and 'group' are used when creating a tar archive. By default,
752 uses the current owner and group.
753 """
754 save_cwd = os.getcwd()
755 if root_dir is not None:
756 if logger is not None:
757 logger.debug("changing into '%s'", root_dir)
758 base_name = os.path.abspath(base_name)
759 if not dry_run:
760 os.chdir(root_dir)
761
762 if base_dir is None:
763 base_dir = os.curdir
764
765 kwargs = {'dry_run': dry_run, 'logger': logger}
766
767 try:
768 format_info = _ARCHIVE_FORMATS[format]
769 except KeyError:
770 raise ValueError("unknown archive format '%s'" % format)
771
772 func = format_info[0]
773 for arg, val in format_info[1]:
774 kwargs[arg] = val
775
776 if format != 'zip':
777 kwargs['owner'] = owner
778 kwargs['group'] = group
779
780 try:
781 filename = func(base_name, base_dir, **kwargs)
782 finally:
783 if root_dir is not None:
784 if logger is not None:
785 logger.debug("changing back to '%s'", save_cwd)
786 os.chdir(save_cwd)
787
788 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000789
790
791def get_unpack_formats():
792 """Returns a list of supported formats for unpacking.
793
794 Each element of the returned sequence is a tuple
795 (name, extensions, description)
796 """
797 formats = [(name, info[0], info[3]) for name, info in
798 _UNPACK_FORMATS.items()]
799 formats.sort()
800 return formats
801
802def _check_unpack_options(extensions, function, extra_args):
803 """Checks what gets registered as an unpacker."""
804 # first make sure no other unpacker is registered for this extension
805 existing_extensions = {}
806 for name, info in _UNPACK_FORMATS.items():
807 for ext in info[0]:
808 existing_extensions[ext] = name
809
810 for extension in extensions:
811 if extension in existing_extensions:
812 msg = '%s is already registered for "%s"'
813 raise RegistryError(msg % (extension,
814 existing_extensions[extension]))
815
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200816 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000817 raise TypeError('The registered function must be a callable')
818
819
820def register_unpack_format(name, extensions, function, extra_args=None,
821 description=''):
822 """Registers an unpack format.
823
824 `name` is the name of the format. `extensions` is a list of extensions
825 corresponding to the format.
826
827 `function` is the callable that will be
828 used to unpack archives. The callable will receive archives to unpack.
829 If it's unable to handle an archive, it needs to raise a ReadError
830 exception.
831
832 If provided, `extra_args` is a sequence of
833 (name, value) tuples that will be passed as arguments to the callable.
834 description can be provided to describe the format, and will be returned
835 by the get_unpack_formats() function.
836 """
837 if extra_args is None:
838 extra_args = []
839 _check_unpack_options(extensions, function, extra_args)
840 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
841
842def unregister_unpack_format(name):
843 """Removes the pack format from the registery."""
844 del _UNPACK_FORMATS[name]
845
846def _ensure_directory(path):
847 """Ensure that the parent directory of `path` exists"""
848 dirname = os.path.dirname(path)
849 if not os.path.isdir(dirname):
850 os.makedirs(dirname)
851
852def _unpack_zipfile(filename, extract_dir):
853 """Unpack zip `filename` to `extract_dir`
854 """
855 try:
856 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400857 except ImportError:
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000858 raise ReadError('zlib not supported, cannot unpack this archive.')
859
860 if not zipfile.is_zipfile(filename):
861 raise ReadError("%s is not a zip file" % filename)
862
863 zip = zipfile.ZipFile(filename)
864 try:
865 for info in zip.infolist():
866 name = info.filename
867
868 # don't extract absolute paths or ones with .. in them
869 if name.startswith('/') or '..' in name:
870 continue
871
872 target = os.path.join(extract_dir, *name.split('/'))
873 if not target:
874 continue
875
876 _ensure_directory(target)
877 if not name.endswith('/'):
878 # file
879 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200880 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000881 try:
882 f.write(data)
883 finally:
884 f.close()
885 del data
886 finally:
887 zip.close()
888
889def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +0300890 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000891 """
892 try:
893 tarobj = tarfile.open(filename)
894 except tarfile.TarError:
895 raise ReadError(
896 "%s is not a compressed or uncompressed tar file" % filename)
897 try:
898 tarobj.extractall(extract_dir)
899 finally:
900 tarobj.close()
901
902_UNPACK_FORMATS = {
903 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000904 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
905 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
906 }
907
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000908if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +0300909 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000910 "bzip2'ed tar-file")
911
Serhiy Storchaka11213772014-08-06 18:50:19 +0300912if _LZMA_SUPPORTED:
913 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
914 "xz'ed tar-file")
915
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000916def _find_unpack_format(filename):
917 for name, info in _UNPACK_FORMATS.items():
918 for extension in info[0]:
919 if filename.endswith(extension):
920 return name
921 return None
922
923def unpack_archive(filename, extract_dir=None, format=None):
924 """Unpack an archive.
925
926 `filename` is the name of the archive.
927
928 `extract_dir` is the name of the target directory, where the archive
929 is unpacked. If not provided, the current working directory is used.
930
931 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
932 other registered format. If not provided, unpack_archive will use the
933 filename extension and see if an unpacker was registered for that
934 extension.
935
936 In case none is found, a ValueError is raised.
937 """
938 if extract_dir is None:
939 extract_dir = os.getcwd()
940
941 if format is not None:
942 try:
943 format_info = _UNPACK_FORMATS[format]
944 except KeyError:
945 raise ValueError("Unknown unpack format '{0}'".format(format))
946
Nick Coghlanabf202d2011-03-16 13:52:20 -0400947 func = format_info[1]
948 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000949 else:
950 # we need to look at the registered unpackers supported extensions
951 format = _find_unpack_format(filename)
952 if format is None:
953 raise ReadError("Unknown archive format '{0}'".format(filename))
954
955 func = _UNPACK_FORMATS[format][1]
956 kwargs = dict(_UNPACK_FORMATS[format][2])
957 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200958
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200959
960if hasattr(os, 'statvfs'):
961
962 __all__.append('disk_usage')
963 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200964
965 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200966 """Return disk usage statistics about the given path.
967
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200968 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200969 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200970 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200971 st = os.statvfs(path)
972 free = st.f_bavail * st.f_frsize
973 total = st.f_blocks * st.f_frsize
974 used = (st.f_blocks - st.f_bfree) * st.f_frsize
975 return _ntuple_diskusage(total, used, free)
976
977elif os.name == 'nt':
978
979 import nt
980 __all__.append('disk_usage')
981 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
982
983 def disk_usage(path):
984 """Return disk usage statistics about the given path.
985
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300986 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200987 'free', which are the amount of total, used and free space, in bytes.
988 """
989 total, free = nt._getdiskusage(path)
990 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200991 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200992
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200993
Sandro Tosid902a142011-08-22 23:28:27 +0200994def chown(path, user=None, group=None):
995 """Change owner user and group of the given path.
996
997 user and group can be the uid/gid or the user/group names, and in that case,
998 they are converted to their respective uid/gid.
999 """
1000
1001 if user is None and group is None:
1002 raise ValueError("user and/or group must be set")
1003
1004 _user = user
1005 _group = group
1006
1007 # -1 means don't change it
1008 if user is None:
1009 _user = -1
1010 # user can either be an int (the uid) or a string (the system username)
1011 elif isinstance(user, str):
1012 _user = _get_uid(user)
1013 if _user is None:
1014 raise LookupError("no such user: {!r}".format(user))
1015
1016 if group is None:
1017 _group = -1
1018 elif not isinstance(group, int):
1019 _group = _get_gid(group)
1020 if _group is None:
1021 raise LookupError("no such group: {!r}".format(group))
1022
1023 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001024
1025def get_terminal_size(fallback=(80, 24)):
1026 """Get the size of the terminal window.
1027
1028 For each of the two dimensions, the environment variable, COLUMNS
1029 and LINES respectively, is checked. If the variable is defined and
1030 the value is a positive integer, it is used.
1031
1032 When COLUMNS or LINES is not defined, which is the common case,
1033 the terminal connected to sys.__stdout__ is queried
1034 by invoking os.get_terminal_size.
1035
1036 If the terminal size cannot be successfully queried, either because
1037 the system doesn't support querying, or because we are not
1038 connected to a terminal, the value given in fallback parameter
1039 is used. Fallback defaults to (80, 24) which is the default
1040 size used by many terminal emulators.
1041
1042 The value returned is a named tuple of type os.terminal_size.
1043 """
1044 # columns, lines are the working values
1045 try:
1046 columns = int(os.environ['COLUMNS'])
1047 except (KeyError, ValueError):
1048 columns = 0
1049
1050 try:
1051 lines = int(os.environ['LINES'])
1052 except (KeyError, ValueError):
1053 lines = 0
1054
1055 # only query if necessary
1056 if columns <= 0 or lines <= 0:
1057 try:
1058 size = os.get_terminal_size(sys.__stdout__.fileno())
1059 except (NameError, OSError):
1060 size = os.terminal_size(fallback)
1061 if columns <= 0:
1062 columns = size.columns
1063 if lines <= 0:
1064 lines = size.lines
1065
1066 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001067
1068def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001069 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001070 conforms to the given mode on the PATH, or None if there is no such
1071 file.
1072
1073 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1074 of os.environ.get("PATH"), or can be overridden with a custom search
1075 path.
1076
1077 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001078 # Check that a given file can be accessed with the correct mode.
1079 # Additionally check that `file` is not a directory, as on Windows
1080 # directories pass the os.access check.
1081 def _access_check(fn, mode):
1082 return (os.path.exists(fn) and os.access(fn, mode)
1083 and not os.path.isdir(fn))
1084
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001085 # If we're given a path with a directory part, look it up directly rather
1086 # than referring to PATH directories. This includes checking relative to the
1087 # current directory, e.g. ./script
1088 if os.path.dirname(cmd):
1089 if _access_check(cmd, mode):
1090 return cmd
1091 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001092
Barry Warsaw618738b2013-04-16 11:05:03 -04001093 if path is None:
1094 path = os.environ.get("PATH", os.defpath)
1095 if not path:
1096 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001097 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001098
1099 if sys.platform == "win32":
1100 # The current directory takes precedence on Windows.
1101 if not os.curdir in path:
1102 path.insert(0, os.curdir)
1103
1104 # PATHEXT is necessary to check on Windows.
1105 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1106 # See if the given file matches any of the expected path extensions.
1107 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001108 # If it does match, only test that one, otherwise we have to try
1109 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001110 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1111 files = [cmd]
1112 else:
1113 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001114 else:
1115 # On other platforms you don't have things like PATHEXT to tell you
1116 # what file suffixes are executable, so just pass on cmd as-is.
1117 files = [cmd]
1118
1119 seen = set()
1120 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001121 normdir = os.path.normcase(dir)
1122 if not normdir in seen:
1123 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001124 for thefile in files:
1125 name = os.path.join(dir, thefile)
1126 if _access_check(name, mode):
1127 return name
1128 return None