blob: 3f4b6bf663ffbbde199aa5377637ba7ceb0066a7 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000013import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000014
15try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000016 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010017 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000018 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040019except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000020 _BZ2_SUPPORTED = False
21
22try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030023 import lzma
24 del lzma
25 _LZMA_SUPPORTED = True
26except ImportError:
27 _LZMA_SUPPORTED = False
28
29try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000030 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040031except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000032 getpwnam = None
33
34try:
35 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040036except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000037 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000038
Tarek Ziadéc3399782010-02-23 05:39:18 +000039__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
40 "copytree", "move", "rmtree", "Error", "SpecialFileError",
41 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000042 "register_archive_format", "unregister_archive_format",
43 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020044 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020045 "ignore_patterns", "chown", "which", "get_terminal_size",
46 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020047 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000048
Andrew Svetlov3438fa42012-12-17 23:35:18 +020049class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000050 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000051
Hynek Schlawack48653762012-10-07 12:49:58 +020052class SameFileError(Error):
53 """Raised when source and destination are the same file."""
54
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000056 """Raised when trying to do a kind of operation (e.g. copying) which is
57 not supported on a special file (e.g. a named pipe)"""
58
Andrew Svetlov3438fa42012-12-17 23:35:18 +020059class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000060 """Raised when a command could not be executed"""
61
Andrew Svetlov3438fa42012-12-17 23:35:18 +020062class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000063 """Raised when an archive cannot be read"""
64
65class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030066 """Raised when a registry operation with the archiving
Tarek Ziadé6ac91722010-04-28 17:51:36 +000067 and unpacking registeries fails"""
68
69
Greg Stein42bb8b32000-07-12 09:55:30 +000070def copyfileobj(fsrc, fdst, length=16*1024):
71 """copy data from file-like object fsrc to file-like object fdst"""
72 while 1:
73 buf = fsrc.read(length)
74 if not buf:
75 break
76 fdst.write(buf)
77
Johannes Gijsbers46f14592004-08-14 13:30:02 +000078def _samefile(src, dst):
79 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000080 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000081 try:
82 return os.path.samefile(src, dst)
83 except OSError:
84 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000085
86 # All other platforms: check for same pathname.
87 return (os.path.normcase(os.path.abspath(src)) ==
88 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000089
Larry Hastingsb4038062012-07-15 10:57:38 -070090def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010091 """Copy data from src to dst.
92
Larry Hastingsb4038062012-07-15 10:57:38 -070093 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +010094 symlink will be created instead of copying the file it points to.
95
96 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000097 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +020098 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000099
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000100 for fn in [src, dst]:
101 try:
102 st = os.stat(fn)
103 except OSError:
104 # File most likely does not exist
105 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000106 else:
107 # XXX What about other special files? (sockets, devices...)
108 if stat.S_ISFIFO(st.st_mode):
109 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000110
Larry Hastingsb4038062012-07-15 10:57:38 -0700111 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100112 os.symlink(os.readlink(src), dst)
113 else:
114 with open(src, 'rb') as fsrc:
115 with open(dst, 'wb') as fdst:
116 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500117 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000118
Larry Hastingsb4038062012-07-15 10:57:38 -0700119def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100120 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000121
Larry Hastingsb4038062012-07-15 10:57:38 -0700122 If follow_symlinks is not set, symlinks aren't followed if and only
123 if both `src` and `dst` are symlinks. If `lchmod` isn't available
124 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100125
126 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700127 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100128 if hasattr(os, 'lchmod'):
129 stat_func, chmod_func = os.lstat, os.lchmod
130 else:
131 return
132 elif hasattr(os, 'chmod'):
133 stat_func, chmod_func = os.stat, os.chmod
134 else:
135 return
136
137 st = stat_func(src)
138 chmod_func(dst, stat.S_IMODE(st.st_mode))
139
Larry Hastingsad5ae042012-07-14 17:55:11 -0700140if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700141 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700142 """Copy extended filesystem attributes from `src` to `dst`.
143
144 Overwrite existing attributes.
145
Larry Hastingsb4038062012-07-15 10:57:38 -0700146 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700147
148 """
149
Hynek Schlawack0beab052013-02-05 08:22:44 +0100150 try:
151 names = os.listxattr(src, follow_symlinks=follow_symlinks)
152 except OSError as e:
153 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
154 raise
155 return
156 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700157 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700158 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
159 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700160 except OSError as e:
161 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
162 raise
163else:
164 def _copyxattr(*args, **kwargs):
165 pass
166
Larry Hastingsb4038062012-07-15 10:57:38 -0700167def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100168 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
169
Larry Hastingsb4038062012-07-15 10:57:38 -0700170 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100171 only if both `src` and `dst` are symlinks.
172
173 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700174 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100175 pass
176
Larry Hastings9cf065c2012-06-22 16:30:09 -0700177 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700178 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700179 if follow:
180 # use the real function if it exists
181 def lookup(name):
182 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100183 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700184 # use the real function only if it exists
185 # *and* it supports follow_symlinks
186 def lookup(name):
187 fn = getattr(os, name, _nop)
188 if fn in os.supports_follow_symlinks:
189 return fn
190 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100191
Larry Hastings9cf065c2012-06-22 16:30:09 -0700192 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000193 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700194 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
195 follow_symlinks=follow)
196 try:
197 lookup("chmod")(dst, mode, follow_symlinks=follow)
198 except NotImplementedError:
199 # if we got a NotImplementedError, it's because
200 # * follow_symlinks=False,
201 # * lchown() is unavailable, and
202 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300203 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700204 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
205 # (it returned ENOSUP.)
206 # therefore we're out of options--we simply cannot chown the
207 # symlink. give up, suppress the error.
208 # (which is what shutil always did in this circumstance.)
209 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100210 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000211 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700212 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000213 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700214 for err in 'EOPNOTSUPP', 'ENOTSUP':
215 if hasattr(errno, err) and why.errno == getattr(errno, err):
216 break
217 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000218 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700219 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200220
Larry Hastingsb4038062012-07-15 10:57:38 -0700221def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500222 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000223
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000224 The destination may be a directory.
225
Larry Hastingsb4038062012-07-15 10:57:38 -0700226 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100227 resembles GNU's "cp -P src dst".
228
Hynek Schlawack48653762012-10-07 12:49:58 +0200229 If source and destination are the same file, a SameFileError will be
230 raised.
231
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000232 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000233 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000234 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700235 copyfile(src, dst, follow_symlinks=follow_symlinks)
236 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500237 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000238
Larry Hastingsb4038062012-07-15 10:57:38 -0700239def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500240 """Copy data and all stat info ("cp -p src dst"). Return the file's
241 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000242
243 The destination may be a directory.
244
Larry Hastingsb4038062012-07-15 10:57:38 -0700245 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100246 resembles GNU's "cp -P src dst".
247
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000248 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000249 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700251 copyfile(src, dst, follow_symlinks=follow_symlinks)
252 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500253 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000254
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255def ignore_patterns(*patterns):
256 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000257
Georg Brandl2ee470f2008-07-16 12:55:28 +0000258 Patterns is a sequence of glob-style patterns
259 that are used to exclude files"""
260 def _ignore_patterns(path, names):
261 ignored_names = []
262 for pattern in patterns:
263 ignored_names.extend(fnmatch.filter(names, pattern))
264 return set(ignored_names)
265 return _ignore_patterns
266
Tarek Ziadéfb437512010-04-20 08:57:33 +0000267def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
268 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000269 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000270
271 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000272 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000273
274 If the optional symlinks flag is true, symbolic links in the
275 source tree result in symbolic links in the destination tree; if
276 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000277 links are copied. If the file pointed by the symlink doesn't
278 exist, an exception will be added in the list of errors raised in
279 an Error exception at the end of the copy process.
280
281 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000282 want to silence this exception. Notice that this has no effect on
283 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000284
Georg Brandl2ee470f2008-07-16 12:55:28 +0000285 The optional ignore argument is a callable. If given, it
286 is called with the `src` parameter, which is the directory
287 being visited by copytree(), and `names` which is the list of
288 `src` contents, as returned by os.listdir():
289
290 callable(src, names) -> ignored_names
291
292 Since copytree() is called recursively, the callable will be
293 called once for each directory that is copied. It returns a
294 list of names relative to the `src` directory that should
295 not be copied.
296
Tarek Ziadé5340db32010-04-19 22:30:51 +0000297 The optional copy_function argument is a callable that will be used
298 to copy each file. It will be called with the source path and the
299 destination path as arguments. By default, copy2() is used, but any
300 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000301
302 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000303 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000304 if ignore is not None:
305 ignored_names = ignore(src, names)
306 else:
307 ignored_names = set()
308
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000309 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000310 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000311 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000312 if name in ignored_names:
313 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000314 srcname = os.path.join(src, name)
315 dstname = os.path.join(dst, name)
316 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000317 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000318 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000319 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100320 # We can't just leave it to `copy_function` because legacy
321 # code with a custom `copy_function` may rely on copytree
322 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000323 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700324 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000325 else:
326 # ignore dangling symlink if the flag is on
327 if not os.path.exists(linkto) and ignore_dangling_symlinks:
328 continue
329 # otherwise let the copy occurs. copy2 will raise an error
Berker Peksag5a294d82015-07-25 14:53:48 +0300330 if os.path.isdir(srcname):
331 copytree(srcname, dstname, symlinks, ignore,
332 copy_function)
333 else:
334 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000335 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000336 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000337 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000338 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000339 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000340 # catch the Error from the recursive copytree so that we can
341 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000342 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000343 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200344 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000345 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000346 try:
347 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000348 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200349 # Copying file access times may fail on Windows
Berker Peksag884afd92014-12-10 02:50:32 +0200350 if getattr(why, 'winerror', None) is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200351 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000352 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000353 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500354 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000355
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200356# version vulnerable to race conditions
357def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000358 try:
359 if os.path.islink(path):
360 # symlinks to directories are forbidden, see bug #1669
361 raise OSError("Cannot call rmtree on a symbolic link")
362 except OSError:
363 onerror(os.path.islink, path, sys.exc_info())
364 # can't continue even if onerror hook returns
365 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000366 names = []
367 try:
368 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200369 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000370 onerror(os.listdir, path, sys.exc_info())
371 for name in names:
372 fullname = os.path.join(path, name)
373 try:
374 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200375 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000376 mode = 0
377 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200378 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000379 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000380 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200381 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200382 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200383 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000384 try:
385 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200386 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000387 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000388
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200389# Version using fd-based APIs to protect against races
390def _rmtree_safe_fd(topfd, path, onerror):
391 names = []
392 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200393 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100394 except OSError as err:
395 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200396 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200397 for name in names:
398 fullname = os.path.join(path, name)
399 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200400 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200401 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100402 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200403 mode = 0
404 if stat.S_ISDIR(mode):
405 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200406 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100407 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200408 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200409 else:
410 try:
411 if os.path.samestat(orig_st, os.fstat(dirfd)):
412 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200413 try:
414 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100415 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200416 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100417 else:
418 try:
419 # This can only happen if someone replaces
420 # a directory with a symlink after the call to
421 # stat.S_ISDIR above.
422 raise OSError("Cannot call rmtree on a symbolic "
423 "link")
424 except OSError:
425 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200426 finally:
427 os.close(dirfd)
428 else:
429 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200430 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100431 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200432 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200433
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200434_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
435 os.supports_dir_fd and
436 os.listdir in os.supports_fd and
437 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000438
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200439def rmtree(path, ignore_errors=False, onerror=None):
440 """Recursively delete a directory tree.
441
442 If ignore_errors is set, errors are ignored; otherwise, if onerror
443 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200444 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200445 path is the argument to that function that caused it to fail; and
446 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
447 is false and onerror is None, an exception is raised.
448
449 """
450 if ignore_errors:
451 def onerror(*args):
452 pass
453 elif onerror is None:
454 def onerror(*args):
455 raise
456 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200457 # While the unsafe rmtree works fine on bytes, the fd based does not.
458 if isinstance(path, bytes):
459 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200460 # Note: To guard against symlink races, we use the standard
461 # lstat()/open()/fstat() trick.
462 try:
463 orig_st = os.lstat(path)
464 except Exception:
465 onerror(os.lstat, path, sys.exc_info())
466 return
467 try:
468 fd = os.open(path, os.O_RDONLY)
469 except Exception:
470 onerror(os.lstat, path, sys.exc_info())
471 return
472 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100473 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200474 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200475 try:
476 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200477 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200478 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200479 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100480 try:
481 # symlinks to directories are forbidden, see bug #1669
482 raise OSError("Cannot call rmtree on a symbolic link")
483 except OSError:
484 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200485 finally:
486 os.close(fd)
487 else:
488 return _rmtree_unsafe(path, onerror)
489
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000490# Allow introspection of whether or not the hardening against symlink
491# attacks is supported on the current platform
492rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000493
Christian Heimesada8c3b2008-03-18 18:26:33 +0000494def _basename(path):
495 # A basename() variant which first strips the trailing slash, if present.
496 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200497 sep = os.path.sep + (os.path.altsep or '')
498 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000499
R David Murray6ffface2014-06-11 14:40:13 -0400500def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000501 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500502 similar to the Unix "mv" command. Return the file or directory's
503 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000504
505 If the destination is a directory or a symlink to a directory, the source
506 is moved inside the directory. The destination path must not already
507 exist.
508
509 If the destination already exists but is not a directory, it may be
510 overwritten depending on os.rename() semantics.
511
512 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100513 Otherwise, src is copied to the destination and then removed. Symlinks are
514 recreated under the new name if os.rename() fails because of cross
515 filesystem renames.
516
R David Murray6ffface2014-06-11 14:40:13 -0400517 The optional `copy_function` argument is a callable that will be used
518 to copy the source or it will be delegated to `copytree`.
519 By default, copy2() is used, but any function that supports the same
520 signature (like copy()) can be used.
521
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000522 A lot more could be done here... A look at a mv.c shows a lot of
523 the issues this implementation glosses over.
524
525 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000526 real_dst = dst
527 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200528 if _samefile(src, dst):
529 # We might be on a case insensitive filesystem,
530 # perform the rename anyway.
531 os.rename(src, dst)
532 return
533
Christian Heimesada8c3b2008-03-18 18:26:33 +0000534 real_dst = os.path.join(dst, _basename(src))
535 if os.path.exists(real_dst):
536 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000537 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000538 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200539 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100540 if os.path.islink(src):
541 linkto = os.readlink(src)
542 os.symlink(linkto, real_dst)
543 os.unlink(src)
544 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000545 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400546 raise Error("Cannot move a directory '%s' into itself"
547 " '%s'." % (src, dst))
548 copytree(src, real_dst, copy_function=copy_function,
549 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000550 rmtree(src)
551 else:
R David Murray6ffface2014-06-11 14:40:13 -0400552 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000553 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500554 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000555
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000556def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300557 src = os.path.abspath(src)
558 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000559 if not src.endswith(os.path.sep):
560 src += os.path.sep
561 if not dst.endswith(os.path.sep):
562 dst += os.path.sep
563 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000564
565def _get_gid(name):
566 """Returns a gid, given a group name."""
567 if getgrnam is None or name is None:
568 return None
569 try:
570 result = getgrnam(name)
571 except KeyError:
572 result = None
573 if result is not None:
574 return result[2]
575 return None
576
577def _get_uid(name):
578 """Returns an uid, given a user name."""
579 if getpwnam is None or name is None:
580 return None
581 try:
582 result = getpwnam(name)
583 except KeyError:
584 result = None
585 if result is not None:
586 return result[2]
587 return None
588
589def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
590 owner=None, group=None, logger=None):
591 """Create a (possibly compressed) tar file from all the files under
592 'base_dir'.
593
Serhiy Storchaka11213772014-08-06 18:50:19 +0300594 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000595
596 'owner' and 'group' can be used to define an owner and a group for the
597 archive that is being built. If not provided, the current owner and group
598 will be used.
599
Éric Araujo4433a5f2010-12-15 20:26:30 +0000600 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300601 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000602
603 Returns the output filename.
604 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000605 tar_compression = {'gzip': 'gz', None: ''}
606 compress_ext = {'gzip': '.gz'}
607
608 if _BZ2_SUPPORTED:
609 tar_compression['bzip2'] = 'bz2'
610 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000611
Serhiy Storchaka11213772014-08-06 18:50:19 +0300612 if _LZMA_SUPPORTED:
613 tar_compression['xz'] = 'xz'
614 compress_ext['xz'] = '.xz'
615
Tarek Ziadé396fad72010-02-23 05:30:31 +0000616 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200617 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000618 raise ValueError("bad value for 'compress', or compression format not "
619 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000620
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000621 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000622 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000623
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200624 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000625 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200626 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000627 if not dry_run:
628 os.makedirs(archive_dir)
629
Tarek Ziadé396fad72010-02-23 05:30:31 +0000630 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000631 if logger is not None:
632 logger.info('Creating tar archive')
633
634 uid = _get_uid(owner)
635 gid = _get_gid(group)
636
637 def _set_uid_gid(tarinfo):
638 if gid is not None:
639 tarinfo.gid = gid
640 tarinfo.gname = group
641 if uid is not None:
642 tarinfo.uid = uid
643 tarinfo.uname = owner
644 return tarinfo
645
646 if not dry_run:
647 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
648 try:
649 tar.add(base_dir, filter=_set_uid_gid)
650 finally:
651 tar.close()
652
Tarek Ziadé396fad72010-02-23 05:30:31 +0000653 return archive_name
654
Tarek Ziadé396fad72010-02-23 05:30:31 +0000655def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
656 """Create a zip file from all the files under 'base_dir'.
657
Éric Araujo4433a5f2010-12-15 20:26:30 +0000658 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000659 "zipfile" Python module (if available) or the InfoZIP "zip" utility
660 (if installed and found on the default search path). If neither tool is
661 available, raises ExecError. Returns the name of the output zip
662 file.
663 """
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400664 import zipfile
665
Tarek Ziadé396fad72010-02-23 05:30:31 +0000666 zip_filename = base_name + ".zip"
667 archive_dir = os.path.dirname(base_name)
668
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200669 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000670 if logger is not None:
671 logger.info("creating %s", archive_dir)
672 if not dry_run:
673 os.makedirs(archive_dir)
674
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400675 if logger is not None:
676 logger.info("creating '%s' and adding '%s' to it",
677 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000678
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400679 if not dry_run:
680 with zipfile.ZipFile(zip_filename, "w",
681 compression=zipfile.ZIP_DEFLATED) as zf:
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300682 path = os.path.normpath(base_dir)
683 zf.write(path, path)
684 if logger is not None:
685 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400686 for dirpath, dirnames, filenames in os.walk(base_dir):
Serhiy Storchakad941d7a2015-09-08 05:51:00 +0300687 for name in sorted(dirnames):
688 path = os.path.normpath(os.path.join(dirpath, name))
689 zf.write(path, path)
690 if logger is not None:
691 logger.info("adding '%s'", path)
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400692 for name in filenames:
693 path = os.path.normpath(os.path.join(dirpath, name))
694 if os.path.isfile(path):
695 zf.write(path, path)
696 if logger is not None:
697 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000698
699 return zip_filename
700
701_ARCHIVE_FORMATS = {
702 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000703 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200704 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000705 }
706
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000707if _BZ2_SUPPORTED:
708 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
709 "bzip2'ed tar-file")
710
Serhiy Storchaka11213772014-08-06 18:50:19 +0300711if _LZMA_SUPPORTED:
712 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
713 "xz'ed tar-file")
714
Tarek Ziadé396fad72010-02-23 05:30:31 +0000715def get_archive_formats():
716 """Returns a list of supported formats for archiving and unarchiving.
717
718 Each element of the returned sequence is a tuple (name, description)
719 """
720 formats = [(name, registry[2]) for name, registry in
721 _ARCHIVE_FORMATS.items()]
722 formats.sort()
723 return formats
724
725def register_archive_format(name, function, extra_args=None, description=''):
726 """Registers an archive format.
727
728 name is the name of the format. function is the callable that will be
729 used to create archives. If provided, extra_args is a sequence of
730 (name, value) tuples that will be passed as arguments to the callable.
731 description can be provided to describe the format, and will be returned
732 by the get_archive_formats() function.
733 """
734 if extra_args is None:
735 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200736 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000737 raise TypeError('The %s object is not callable' % function)
738 if not isinstance(extra_args, (tuple, list)):
739 raise TypeError('extra_args needs to be a sequence')
740 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200741 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000742 raise TypeError('extra_args elements are : (arg_name, value)')
743
744 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
745
746def unregister_archive_format(name):
747 del _ARCHIVE_FORMATS[name]
748
749def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
750 dry_run=0, owner=None, group=None, logger=None):
751 """Create an archive file (eg. zip or tar).
752
753 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000754 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
755 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000756
757 'root_dir' is a directory that will be the root directory of the
758 archive; ie. we typically chdir into 'root_dir' before creating the
759 archive. 'base_dir' is the directory where we start archiving from;
760 ie. 'base_dir' will be the common prefix of all files and
761 directories in the archive. 'root_dir' and 'base_dir' both default
762 to the current directory. Returns the name of the archive file.
763
764 'owner' and 'group' are used when creating a tar archive. By default,
765 uses the current owner and group.
766 """
767 save_cwd = os.getcwd()
768 if root_dir is not None:
769 if logger is not None:
770 logger.debug("changing into '%s'", root_dir)
771 base_name = os.path.abspath(base_name)
772 if not dry_run:
773 os.chdir(root_dir)
774
775 if base_dir is None:
776 base_dir = os.curdir
777
778 kwargs = {'dry_run': dry_run, 'logger': logger}
779
780 try:
781 format_info = _ARCHIVE_FORMATS[format]
782 except KeyError:
783 raise ValueError("unknown archive format '%s'" % format)
784
785 func = format_info[0]
786 for arg, val in format_info[1]:
787 kwargs[arg] = val
788
789 if format != 'zip':
790 kwargs['owner'] = owner
791 kwargs['group'] = group
792
793 try:
794 filename = func(base_name, base_dir, **kwargs)
795 finally:
796 if root_dir is not None:
797 if logger is not None:
798 logger.debug("changing back to '%s'", save_cwd)
799 os.chdir(save_cwd)
800
801 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000802
803
804def get_unpack_formats():
805 """Returns a list of supported formats for unpacking.
806
807 Each element of the returned sequence is a tuple
808 (name, extensions, description)
809 """
810 formats = [(name, info[0], info[3]) for name, info in
811 _UNPACK_FORMATS.items()]
812 formats.sort()
813 return formats
814
815def _check_unpack_options(extensions, function, extra_args):
816 """Checks what gets registered as an unpacker."""
817 # first make sure no other unpacker is registered for this extension
818 existing_extensions = {}
819 for name, info in _UNPACK_FORMATS.items():
820 for ext in info[0]:
821 existing_extensions[ext] = name
822
823 for extension in extensions:
824 if extension in existing_extensions:
825 msg = '%s is already registered for "%s"'
826 raise RegistryError(msg % (extension,
827 existing_extensions[extension]))
828
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200829 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000830 raise TypeError('The registered function must be a callable')
831
832
833def register_unpack_format(name, extensions, function, extra_args=None,
834 description=''):
835 """Registers an unpack format.
836
837 `name` is the name of the format. `extensions` is a list of extensions
838 corresponding to the format.
839
840 `function` is the callable that will be
841 used to unpack archives. The callable will receive archives to unpack.
842 If it's unable to handle an archive, it needs to raise a ReadError
843 exception.
844
845 If provided, `extra_args` is a sequence of
846 (name, value) tuples that will be passed as arguments to the callable.
847 description can be provided to describe the format, and will be returned
848 by the get_unpack_formats() function.
849 """
850 if extra_args is None:
851 extra_args = []
852 _check_unpack_options(extensions, function, extra_args)
853 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
854
855def unregister_unpack_format(name):
856 """Removes the pack format from the registery."""
857 del _UNPACK_FORMATS[name]
858
859def _ensure_directory(path):
860 """Ensure that the parent directory of `path` exists"""
861 dirname = os.path.dirname(path)
862 if not os.path.isdir(dirname):
863 os.makedirs(dirname)
864
865def _unpack_zipfile(filename, extract_dir):
866 """Unpack zip `filename` to `extract_dir`
867 """
868 try:
869 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400870 except ImportError:
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000871 raise ReadError('zlib not supported, cannot unpack this archive.')
872
873 if not zipfile.is_zipfile(filename):
874 raise ReadError("%s is not a zip file" % filename)
875
876 zip = zipfile.ZipFile(filename)
877 try:
878 for info in zip.infolist():
879 name = info.filename
880
881 # don't extract absolute paths or ones with .. in them
882 if name.startswith('/') or '..' in name:
883 continue
884
885 target = os.path.join(extract_dir, *name.split('/'))
886 if not target:
887 continue
888
889 _ensure_directory(target)
890 if not name.endswith('/'):
891 # file
892 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200893 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000894 try:
895 f.write(data)
896 finally:
897 f.close()
898 del data
899 finally:
900 zip.close()
901
902def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +0300903 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000904 """
905 try:
906 tarobj = tarfile.open(filename)
907 except tarfile.TarError:
908 raise ReadError(
909 "%s is not a compressed or uncompressed tar file" % filename)
910 try:
911 tarobj.extractall(extract_dir)
912 finally:
913 tarobj.close()
914
915_UNPACK_FORMATS = {
916 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000917 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
918 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
919 }
920
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000921if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +0300922 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000923 "bzip2'ed tar-file")
924
Serhiy Storchaka11213772014-08-06 18:50:19 +0300925if _LZMA_SUPPORTED:
926 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
927 "xz'ed tar-file")
928
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000929def _find_unpack_format(filename):
930 for name, info in _UNPACK_FORMATS.items():
931 for extension in info[0]:
932 if filename.endswith(extension):
933 return name
934 return None
935
936def unpack_archive(filename, extract_dir=None, format=None):
937 """Unpack an archive.
938
939 `filename` is the name of the archive.
940
941 `extract_dir` is the name of the target directory, where the archive
942 is unpacked. If not provided, the current working directory is used.
943
944 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
945 other registered format. If not provided, unpack_archive will use the
946 filename extension and see if an unpacker was registered for that
947 extension.
948
949 In case none is found, a ValueError is raised.
950 """
951 if extract_dir is None:
952 extract_dir = os.getcwd()
953
954 if format is not None:
955 try:
956 format_info = _UNPACK_FORMATS[format]
957 except KeyError:
958 raise ValueError("Unknown unpack format '{0}'".format(format))
959
Nick Coghlanabf202d2011-03-16 13:52:20 -0400960 func = format_info[1]
961 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000962 else:
963 # we need to look at the registered unpackers supported extensions
964 format = _find_unpack_format(filename)
965 if format is None:
966 raise ReadError("Unknown archive format '{0}'".format(filename))
967
968 func = _UNPACK_FORMATS[format][1]
969 kwargs = dict(_UNPACK_FORMATS[format][2])
970 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200971
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200972
973if hasattr(os, 'statvfs'):
974
975 __all__.append('disk_usage')
976 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200977
978 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200979 """Return disk usage statistics about the given path.
980
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200981 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200982 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200983 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200984 st = os.statvfs(path)
985 free = st.f_bavail * st.f_frsize
986 total = st.f_blocks * st.f_frsize
987 used = (st.f_blocks - st.f_bfree) * st.f_frsize
988 return _ntuple_diskusage(total, used, free)
989
990elif os.name == 'nt':
991
992 import nt
993 __all__.append('disk_usage')
994 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
995
996 def disk_usage(path):
997 """Return disk usage statistics about the given path.
998
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300999 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +02001000 'free', which are the amount of total, used and free space, in bytes.
1001 """
1002 total, free = nt._getdiskusage(path)
1003 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +02001004 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +02001005
Éric Araujo0ac4a5d2011-09-01 08:31:51 +02001006
Sandro Tosid902a142011-08-22 23:28:27 +02001007def chown(path, user=None, group=None):
1008 """Change owner user and group of the given path.
1009
1010 user and group can be the uid/gid or the user/group names, and in that case,
1011 they are converted to their respective uid/gid.
1012 """
1013
1014 if user is None and group is None:
1015 raise ValueError("user and/or group must be set")
1016
1017 _user = user
1018 _group = group
1019
1020 # -1 means don't change it
1021 if user is None:
1022 _user = -1
1023 # user can either be an int (the uid) or a string (the system username)
1024 elif isinstance(user, str):
1025 _user = _get_uid(user)
1026 if _user is None:
1027 raise LookupError("no such user: {!r}".format(user))
1028
1029 if group is None:
1030 _group = -1
1031 elif not isinstance(group, int):
1032 _group = _get_gid(group)
1033 if _group is None:
1034 raise LookupError("no such group: {!r}".format(group))
1035
1036 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001037
1038def get_terminal_size(fallback=(80, 24)):
1039 """Get the size of the terminal window.
1040
1041 For each of the two dimensions, the environment variable, COLUMNS
1042 and LINES respectively, is checked. If the variable is defined and
1043 the value is a positive integer, it is used.
1044
1045 When COLUMNS or LINES is not defined, which is the common case,
1046 the terminal connected to sys.__stdout__ is queried
1047 by invoking os.get_terminal_size.
1048
1049 If the terminal size cannot be successfully queried, either because
1050 the system doesn't support querying, or because we are not
1051 connected to a terminal, the value given in fallback parameter
1052 is used. Fallback defaults to (80, 24) which is the default
1053 size used by many terminal emulators.
1054
1055 The value returned is a named tuple of type os.terminal_size.
1056 """
1057 # columns, lines are the working values
1058 try:
1059 columns = int(os.environ['COLUMNS'])
1060 except (KeyError, ValueError):
1061 columns = 0
1062
1063 try:
1064 lines = int(os.environ['LINES'])
1065 except (KeyError, ValueError):
1066 lines = 0
1067
1068 # only query if necessary
1069 if columns <= 0 or lines <= 0:
1070 try:
1071 size = os.get_terminal_size(sys.__stdout__.fileno())
1072 except (NameError, OSError):
1073 size = os.terminal_size(fallback)
1074 if columns <= 0:
1075 columns = size.columns
1076 if lines <= 0:
1077 lines = size.lines
1078
1079 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001080
1081def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001082 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001083 conforms to the given mode on the PATH, or None if there is no such
1084 file.
1085
1086 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1087 of os.environ.get("PATH"), or can be overridden with a custom search
1088 path.
1089
1090 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001091 # Check that a given file can be accessed with the correct mode.
1092 # Additionally check that `file` is not a directory, as on Windows
1093 # directories pass the os.access check.
1094 def _access_check(fn, mode):
1095 return (os.path.exists(fn) and os.access(fn, mode)
1096 and not os.path.isdir(fn))
1097
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001098 # If we're given a path with a directory part, look it up directly rather
1099 # than referring to PATH directories. This includes checking relative to the
1100 # current directory, e.g. ./script
1101 if os.path.dirname(cmd):
1102 if _access_check(cmd, mode):
1103 return cmd
1104 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001105
Barry Warsaw618738b2013-04-16 11:05:03 -04001106 if path is None:
1107 path = os.environ.get("PATH", os.defpath)
1108 if not path:
1109 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001110 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001111
1112 if sys.platform == "win32":
1113 # The current directory takes precedence on Windows.
1114 if not os.curdir in path:
1115 path.insert(0, os.curdir)
1116
1117 # PATHEXT is necessary to check on Windows.
1118 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1119 # See if the given file matches any of the expected path extensions.
1120 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001121 # If it does match, only test that one, otherwise we have to try
1122 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001123 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1124 files = [cmd]
1125 else:
1126 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001127 else:
1128 # On other platforms you don't have things like PATHEXT to tell you
1129 # what file suffixes are executable, so just pass on cmd as-is.
1130 files = [cmd]
1131
1132 seen = set()
1133 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001134 normdir = os.path.normcase(dir)
1135 if not normdir in seen:
1136 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001137 for thefile in files:
1138 name = os.path.join(dir, thefile)
1139 if _access_check(name, mode):
1140 return name
1141 return None