blob: 0cc74f6b97177af863c3c3e48de9d6045f233640 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
20except ImportError:
21 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
25except ImportError:
26 getpwnam = None
27
28try:
29 from grp import getgrnam
30except ImportError:
31 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Brian Curtinc57a3452012-06-22 16:00:30 -050039 "ignore_patterns", "chown", "which"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020040 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000041
Andrew Svetlov3438fa42012-12-17 23:35:18 +020042class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000043 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Hynek Schlawack48653762012-10-07 12:49:58 +020045class SameFileError(Error):
46 """Raised when source and destination are the same file."""
47
Andrew Svetlov3438fa42012-12-17 23:35:18 +020048class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000049 """Raised when trying to do a kind of operation (e.g. copying) which is
50 not supported on a special file (e.g. a named pipe)"""
51
Andrew Svetlov3438fa42012-12-17 23:35:18 +020052class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000053 """Raised when a command could not be executed"""
54
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000056 """Raised when an archive cannot be read"""
57
58class RegistryError(Exception):
59 """Raised when a registery operation with the archiving
60 and unpacking registeries fails"""
61
62
Greg Stein42bb8b32000-07-12 09:55:30 +000063def copyfileobj(fsrc, fdst, length=16*1024):
64 """copy data from file-like object fsrc to file-like object fdst"""
65 while 1:
66 buf = fsrc.read(length)
67 if not buf:
68 break
69 fdst.write(buf)
70
Johannes Gijsbers46f14592004-08-14 13:30:02 +000071def _samefile(src, dst):
72 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000073 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000074 try:
75 return os.path.samefile(src, dst)
76 except OSError:
77 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000078
79 # All other platforms: check for same pathname.
80 return (os.path.normcase(os.path.abspath(src)) ==
81 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000082
Larry Hastingsb4038062012-07-15 10:57:38 -070083def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010084 """Copy data from src to dst.
85
Larry Hastingsb4038062012-07-15 10:57:38 -070086 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +010087 symlink will be created instead of copying the file it points to.
88
89 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000090 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +020091 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000092
Antoine Pitrou7fff0962009-05-01 21:09:44 +000093 for fn in [src, dst]:
94 try:
95 st = os.stat(fn)
96 except OSError:
97 # File most likely does not exist
98 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +000099 else:
100 # XXX What about other special files? (sockets, devices...)
101 if stat.S_ISFIFO(st.st_mode):
102 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000103
Larry Hastingsb4038062012-07-15 10:57:38 -0700104 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100105 os.symlink(os.readlink(src), dst)
106 else:
107 with open(src, 'rb') as fsrc:
108 with open(dst, 'wb') as fdst:
109 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500110 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000111
Larry Hastingsb4038062012-07-15 10:57:38 -0700112def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100113 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000114
Larry Hastingsb4038062012-07-15 10:57:38 -0700115 If follow_symlinks is not set, symlinks aren't followed if and only
116 if both `src` and `dst` are symlinks. If `lchmod` isn't available
117 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100118
119 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700120 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100121 if hasattr(os, 'lchmod'):
122 stat_func, chmod_func = os.lstat, os.lchmod
123 else:
124 return
125 elif hasattr(os, 'chmod'):
126 stat_func, chmod_func = os.stat, os.chmod
127 else:
128 return
129
130 st = stat_func(src)
131 chmod_func(dst, stat.S_IMODE(st.st_mode))
132
Larry Hastingsad5ae042012-07-14 17:55:11 -0700133if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700134 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700135 """Copy extended filesystem attributes from `src` to `dst`.
136
137 Overwrite existing attributes.
138
Larry Hastingsb4038062012-07-15 10:57:38 -0700139 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700140
141 """
142
Larry Hastingsb4038062012-07-15 10:57:38 -0700143 for name in os.listxattr(src, follow_symlinks=follow_symlinks):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700144 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700145 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
146 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700147 except OSError as e:
148 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
149 raise
150else:
151 def _copyxattr(*args, **kwargs):
152 pass
153
Larry Hastingsb4038062012-07-15 10:57:38 -0700154def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100155 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
156
Larry Hastingsb4038062012-07-15 10:57:38 -0700157 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100158 only if both `src` and `dst` are symlinks.
159
160 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700161 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100162 pass
163
Larry Hastings9cf065c2012-06-22 16:30:09 -0700164 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700165 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700166 if follow:
167 # use the real function if it exists
168 def lookup(name):
169 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100170 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700171 # use the real function only if it exists
172 # *and* it supports follow_symlinks
173 def lookup(name):
174 fn = getattr(os, name, _nop)
175 if fn in os.supports_follow_symlinks:
176 return fn
177 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100178
Larry Hastings9cf065c2012-06-22 16:30:09 -0700179 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000180 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700181 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
182 follow_symlinks=follow)
183 try:
184 lookup("chmod")(dst, mode, follow_symlinks=follow)
185 except NotImplementedError:
186 # if we got a NotImplementedError, it's because
187 # * follow_symlinks=False,
188 # * lchown() is unavailable, and
189 # * either
190 # * fchownat() is unvailable or
191 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
192 # (it returned ENOSUP.)
193 # therefore we're out of options--we simply cannot chown the
194 # symlink. give up, suppress the error.
195 # (which is what shutil always did in this circumstance.)
196 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100197 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000198 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700199 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000200 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700201 for err in 'EOPNOTSUPP', 'ENOTSUP':
202 if hasattr(errno, err) and why.errno == getattr(errno, err):
203 break
204 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000205 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700206 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200207
Larry Hastingsb4038062012-07-15 10:57:38 -0700208def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500209 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000210
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000211 The destination may be a directory.
212
Larry Hastingsb4038062012-07-15 10:57:38 -0700213 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100214 resembles GNU's "cp -P src dst".
215
Hynek Schlawack48653762012-10-07 12:49:58 +0200216 If source and destination are the same file, a SameFileError will be
217 raised.
218
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000219 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000220 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000221 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700222 copyfile(src, dst, follow_symlinks=follow_symlinks)
223 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500224 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000225
Larry Hastingsb4038062012-07-15 10:57:38 -0700226def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500227 """Copy data and all stat info ("cp -p src dst"). Return the file's
228 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000229
230 The destination may be a directory.
231
Larry Hastingsb4038062012-07-15 10:57:38 -0700232 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100233 resembles GNU's "cp -P src dst".
234
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000235 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000236 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000237 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700238 copyfile(src, dst, follow_symlinks=follow_symlinks)
239 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500240 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000241
Georg Brandl2ee470f2008-07-16 12:55:28 +0000242def ignore_patterns(*patterns):
243 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000244
Georg Brandl2ee470f2008-07-16 12:55:28 +0000245 Patterns is a sequence of glob-style patterns
246 that are used to exclude files"""
247 def _ignore_patterns(path, names):
248 ignored_names = []
249 for pattern in patterns:
250 ignored_names.extend(fnmatch.filter(names, pattern))
251 return set(ignored_names)
252 return _ignore_patterns
253
Tarek Ziadéfb437512010-04-20 08:57:33 +0000254def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
255 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000256 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000257
258 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000259 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000260
261 If the optional symlinks flag is true, symbolic links in the
262 source tree result in symbolic links in the destination tree; if
263 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000264 links are copied. If the file pointed by the symlink doesn't
265 exist, an exception will be added in the list of errors raised in
266 an Error exception at the end of the copy process.
267
268 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000269 want to silence this exception. Notice that this has no effect on
270 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000271
Georg Brandl2ee470f2008-07-16 12:55:28 +0000272 The optional ignore argument is a callable. If given, it
273 is called with the `src` parameter, which is the directory
274 being visited by copytree(), and `names` which is the list of
275 `src` contents, as returned by os.listdir():
276
277 callable(src, names) -> ignored_names
278
279 Since copytree() is called recursively, the callable will be
280 called once for each directory that is copied. It returns a
281 list of names relative to the `src` directory that should
282 not be copied.
283
Tarek Ziadé5340db32010-04-19 22:30:51 +0000284 The optional copy_function argument is a callable that will be used
285 to copy each file. It will be called with the source path and the
286 destination path as arguments. By default, copy2() is used, but any
287 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000288
289 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000290 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000291 if ignore is not None:
292 ignored_names = ignore(src, names)
293 else:
294 ignored_names = set()
295
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000296 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000297 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000298 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000299 if name in ignored_names:
300 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000301 srcname = os.path.join(src, name)
302 dstname = os.path.join(dst, name)
303 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000304 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000305 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000306 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100307 # We can't just leave it to `copy_function` because legacy
308 # code with a custom `copy_function` may rely on copytree
309 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000310 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700311 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000312 else:
313 # ignore dangling symlink if the flag is on
314 if not os.path.exists(linkto) and ignore_dangling_symlinks:
315 continue
316 # otherwise let the copy occurs. copy2 will raise an error
317 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000318 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000319 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000320 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000321 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000322 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000323 # catch the Error from the recursive copytree so that we can
324 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000325 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000326 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200327 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000328 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329 try:
330 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000331 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200332 # Copying file access times may fail on Windows
333 if why.winerror is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200334 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000335 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000336 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500337 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000338
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200339# version vulnerable to race conditions
340def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000341 try:
342 if os.path.islink(path):
343 # symlinks to directories are forbidden, see bug #1669
344 raise OSError("Cannot call rmtree on a symbolic link")
345 except OSError:
346 onerror(os.path.islink, path, sys.exc_info())
347 # can't continue even if onerror hook returns
348 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000349 names = []
350 try:
351 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200352 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000353 onerror(os.listdir, path, sys.exc_info())
354 for name in names:
355 fullname = os.path.join(path, name)
356 try:
357 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200358 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000359 mode = 0
360 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200361 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000362 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000363 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200364 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200365 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200366 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000367 try:
368 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200369 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000370 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000371
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200372# Version using fd-based APIs to protect against races
373def _rmtree_safe_fd(topfd, path, onerror):
374 names = []
375 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200376 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100377 except OSError as err:
378 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200379 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200380 for name in names:
381 fullname = os.path.join(path, name)
382 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200383 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200384 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100385 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200386 mode = 0
387 if stat.S_ISDIR(mode):
388 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200389 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100390 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200391 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200392 else:
393 try:
394 if os.path.samestat(orig_st, os.fstat(dirfd)):
395 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200396 try:
397 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100398 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200399 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100400 else:
401 try:
402 # This can only happen if someone replaces
403 # a directory with a symlink after the call to
404 # stat.S_ISDIR above.
405 raise OSError("Cannot call rmtree on a symbolic "
406 "link")
407 except OSError:
408 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200409 finally:
410 os.close(dirfd)
411 else:
412 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200413 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100414 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200415 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200416
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200417_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
418 os.supports_dir_fd and
419 os.listdir in os.supports_fd and
420 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000421
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200422def rmtree(path, ignore_errors=False, onerror=None):
423 """Recursively delete a directory tree.
424
425 If ignore_errors is set, errors are ignored; otherwise, if onerror
426 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200427 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200428 path is the argument to that function that caused it to fail; and
429 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
430 is false and onerror is None, an exception is raised.
431
432 """
433 if ignore_errors:
434 def onerror(*args):
435 pass
436 elif onerror is None:
437 def onerror(*args):
438 raise
439 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200440 # While the unsafe rmtree works fine on bytes, the fd based does not.
441 if isinstance(path, bytes):
442 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200443 # Note: To guard against symlink races, we use the standard
444 # lstat()/open()/fstat() trick.
445 try:
446 orig_st = os.lstat(path)
447 except Exception:
448 onerror(os.lstat, path, sys.exc_info())
449 return
450 try:
451 fd = os.open(path, os.O_RDONLY)
452 except Exception:
453 onerror(os.lstat, path, sys.exc_info())
454 return
455 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100456 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200457 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200458 try:
459 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200460 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200461 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200462 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100463 try:
464 # symlinks to directories are forbidden, see bug #1669
465 raise OSError("Cannot call rmtree on a symbolic link")
466 except OSError:
467 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200468 finally:
469 os.close(fd)
470 else:
471 return _rmtree_unsafe(path, onerror)
472
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000473# Allow introspection of whether or not the hardening against symlink
474# attacks is supported on the current platform
475rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000476
Christian Heimesada8c3b2008-03-18 18:26:33 +0000477def _basename(path):
478 # A basename() variant which first strips the trailing slash, if present.
479 # Thus we always get the last component of the path, even for directories.
480 return os.path.basename(path.rstrip(os.path.sep))
481
482def move(src, dst):
483 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500484 similar to the Unix "mv" command. Return the file or directory's
485 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000486
487 If the destination is a directory or a symlink to a directory, the source
488 is moved inside the directory. The destination path must not already
489 exist.
490
491 If the destination already exists but is not a directory, it may be
492 overwritten depending on os.rename() semantics.
493
494 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100495 Otherwise, src is copied to the destination and then removed. Symlinks are
496 recreated under the new name if os.rename() fails because of cross
497 filesystem renames.
498
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000499 A lot more could be done here... A look at a mv.c shows a lot of
500 the issues this implementation glosses over.
501
502 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000503 real_dst = dst
504 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200505 if _samefile(src, dst):
506 # We might be on a case insensitive filesystem,
507 # perform the rename anyway.
508 os.rename(src, dst)
509 return
510
Christian Heimesada8c3b2008-03-18 18:26:33 +0000511 real_dst = os.path.join(dst, _basename(src))
512 if os.path.exists(real_dst):
513 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000514 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000515 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200516 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100517 if os.path.islink(src):
518 linkto = os.readlink(src)
519 os.symlink(linkto, real_dst)
520 os.unlink(src)
521 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000522 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000523 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000524 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000525 rmtree(src)
526 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000527 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000528 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500529 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000530
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000531def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000532 src = abspath(src)
533 dst = abspath(dst)
534 if not src.endswith(os.path.sep):
535 src += os.path.sep
536 if not dst.endswith(os.path.sep):
537 dst += os.path.sep
538 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000539
540def _get_gid(name):
541 """Returns a gid, given a group name."""
542 if getgrnam is None or name is None:
543 return None
544 try:
545 result = getgrnam(name)
546 except KeyError:
547 result = None
548 if result is not None:
549 return result[2]
550 return None
551
552def _get_uid(name):
553 """Returns an uid, given a user name."""
554 if getpwnam is None or name is None:
555 return None
556 try:
557 result = getpwnam(name)
558 except KeyError:
559 result = None
560 if result is not None:
561 return result[2]
562 return None
563
564def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
565 owner=None, group=None, logger=None):
566 """Create a (possibly compressed) tar file from all the files under
567 'base_dir'.
568
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000569 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000570
571 'owner' and 'group' can be used to define an owner and a group for the
572 archive that is being built. If not provided, the current owner and group
573 will be used.
574
Éric Araujo4433a5f2010-12-15 20:26:30 +0000575 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000576 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000577
578 Returns the output filename.
579 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000580 tar_compression = {'gzip': 'gz', None: ''}
581 compress_ext = {'gzip': '.gz'}
582
583 if _BZ2_SUPPORTED:
584 tar_compression['bzip2'] = 'bz2'
585 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000586
587 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200588 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000589 raise ValueError("bad value for 'compress', or compression format not "
590 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000591
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000592 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000593 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000594
Tarek Ziadé396fad72010-02-23 05:30:31 +0000595 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000596 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200597 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000598 if not dry_run:
599 os.makedirs(archive_dir)
600
Tarek Ziadé396fad72010-02-23 05:30:31 +0000601 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000602 if logger is not None:
603 logger.info('Creating tar archive')
604
605 uid = _get_uid(owner)
606 gid = _get_gid(group)
607
608 def _set_uid_gid(tarinfo):
609 if gid is not None:
610 tarinfo.gid = gid
611 tarinfo.gname = group
612 if uid is not None:
613 tarinfo.uid = uid
614 tarinfo.uname = owner
615 return tarinfo
616
617 if not dry_run:
618 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
619 try:
620 tar.add(base_dir, filter=_set_uid_gid)
621 finally:
622 tar.close()
623
Tarek Ziadé396fad72010-02-23 05:30:31 +0000624 return archive_name
625
Tarek Ziadée2124162010-04-21 13:35:21 +0000626def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000627 # XXX see if we want to keep an external call here
628 if verbose:
629 zipoptions = "-r"
630 else:
631 zipoptions = "-rq"
632 from distutils.errors import DistutilsExecError
633 from distutils.spawn import spawn
634 try:
635 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
636 except DistutilsExecError:
637 # XXX really should distinguish between "couldn't find
638 # external 'zip' command" and "zip failed".
639 raise ExecError("unable to create zip file '%s': "
640 "could neither import the 'zipfile' module nor "
641 "find a standalone zip utility") % zip_filename
642
643def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
644 """Create a zip file from all the files under 'base_dir'.
645
Éric Araujo4433a5f2010-12-15 20:26:30 +0000646 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000647 "zipfile" Python module (if available) or the InfoZIP "zip" utility
648 (if installed and found on the default search path). If neither tool is
649 available, raises ExecError. Returns the name of the output zip
650 file.
651 """
652 zip_filename = base_name + ".zip"
653 archive_dir = os.path.dirname(base_name)
654
655 if not os.path.exists(archive_dir):
656 if logger is not None:
657 logger.info("creating %s", archive_dir)
658 if not dry_run:
659 os.makedirs(archive_dir)
660
661 # If zipfile module is not available, try spawning an external 'zip'
662 # command.
663 try:
664 import zipfile
665 except ImportError:
666 zipfile = None
667
668 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000669 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000670 else:
671 if logger is not None:
672 logger.info("creating '%s' and adding '%s' to it",
673 zip_filename, base_dir)
674
675 if not dry_run:
676 zip = zipfile.ZipFile(zip_filename, "w",
677 compression=zipfile.ZIP_DEFLATED)
678
679 for dirpath, dirnames, filenames in os.walk(base_dir):
680 for name in filenames:
681 path = os.path.normpath(os.path.join(dirpath, name))
682 if os.path.isfile(path):
683 zip.write(path, path)
684 if logger is not None:
685 logger.info("adding '%s'", path)
686 zip.close()
687
688 return zip_filename
689
690_ARCHIVE_FORMATS = {
691 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000692 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200693 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000694 }
695
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000696if _BZ2_SUPPORTED:
697 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
698 "bzip2'ed tar-file")
699
Tarek Ziadé396fad72010-02-23 05:30:31 +0000700def get_archive_formats():
701 """Returns a list of supported formats for archiving and unarchiving.
702
703 Each element of the returned sequence is a tuple (name, description)
704 """
705 formats = [(name, registry[2]) for name, registry in
706 _ARCHIVE_FORMATS.items()]
707 formats.sort()
708 return formats
709
710def register_archive_format(name, function, extra_args=None, description=''):
711 """Registers an archive format.
712
713 name is the name of the format. function is the callable that will be
714 used to create archives. If provided, extra_args is a sequence of
715 (name, value) tuples that will be passed as arguments to the callable.
716 description can be provided to describe the format, and will be returned
717 by the get_archive_formats() function.
718 """
719 if extra_args is None:
720 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200721 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000722 raise TypeError('The %s object is not callable' % function)
723 if not isinstance(extra_args, (tuple, list)):
724 raise TypeError('extra_args needs to be a sequence')
725 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200726 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000727 raise TypeError('extra_args elements are : (arg_name, value)')
728
729 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
730
731def unregister_archive_format(name):
732 del _ARCHIVE_FORMATS[name]
733
734def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
735 dry_run=0, owner=None, group=None, logger=None):
736 """Create an archive file (eg. zip or tar).
737
738 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000739 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
740 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000741
742 'root_dir' is a directory that will be the root directory of the
743 archive; ie. we typically chdir into 'root_dir' before creating the
744 archive. 'base_dir' is the directory where we start archiving from;
745 ie. 'base_dir' will be the common prefix of all files and
746 directories in the archive. 'root_dir' and 'base_dir' both default
747 to the current directory. Returns the name of the archive file.
748
749 'owner' and 'group' are used when creating a tar archive. By default,
750 uses the current owner and group.
751 """
752 save_cwd = os.getcwd()
753 if root_dir is not None:
754 if logger is not None:
755 logger.debug("changing into '%s'", root_dir)
756 base_name = os.path.abspath(base_name)
757 if not dry_run:
758 os.chdir(root_dir)
759
760 if base_dir is None:
761 base_dir = os.curdir
762
763 kwargs = {'dry_run': dry_run, 'logger': logger}
764
765 try:
766 format_info = _ARCHIVE_FORMATS[format]
767 except KeyError:
768 raise ValueError("unknown archive format '%s'" % format)
769
770 func = format_info[0]
771 for arg, val in format_info[1]:
772 kwargs[arg] = val
773
774 if format != 'zip':
775 kwargs['owner'] = owner
776 kwargs['group'] = group
777
778 try:
779 filename = func(base_name, base_dir, **kwargs)
780 finally:
781 if root_dir is not None:
782 if logger is not None:
783 logger.debug("changing back to '%s'", save_cwd)
784 os.chdir(save_cwd)
785
786 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000787
788
789def get_unpack_formats():
790 """Returns a list of supported formats for unpacking.
791
792 Each element of the returned sequence is a tuple
793 (name, extensions, description)
794 """
795 formats = [(name, info[0], info[3]) for name, info in
796 _UNPACK_FORMATS.items()]
797 formats.sort()
798 return formats
799
800def _check_unpack_options(extensions, function, extra_args):
801 """Checks what gets registered as an unpacker."""
802 # first make sure no other unpacker is registered for this extension
803 existing_extensions = {}
804 for name, info in _UNPACK_FORMATS.items():
805 for ext in info[0]:
806 existing_extensions[ext] = name
807
808 for extension in extensions:
809 if extension in existing_extensions:
810 msg = '%s is already registered for "%s"'
811 raise RegistryError(msg % (extension,
812 existing_extensions[extension]))
813
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200814 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000815 raise TypeError('The registered function must be a callable')
816
817
818def register_unpack_format(name, extensions, function, extra_args=None,
819 description=''):
820 """Registers an unpack format.
821
822 `name` is the name of the format. `extensions` is a list of extensions
823 corresponding to the format.
824
825 `function` is the callable that will be
826 used to unpack archives. The callable will receive archives to unpack.
827 If it's unable to handle an archive, it needs to raise a ReadError
828 exception.
829
830 If provided, `extra_args` is a sequence of
831 (name, value) tuples that will be passed as arguments to the callable.
832 description can be provided to describe the format, and will be returned
833 by the get_unpack_formats() function.
834 """
835 if extra_args is None:
836 extra_args = []
837 _check_unpack_options(extensions, function, extra_args)
838 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
839
840def unregister_unpack_format(name):
841 """Removes the pack format from the registery."""
842 del _UNPACK_FORMATS[name]
843
844def _ensure_directory(path):
845 """Ensure that the parent directory of `path` exists"""
846 dirname = os.path.dirname(path)
847 if not os.path.isdir(dirname):
848 os.makedirs(dirname)
849
850def _unpack_zipfile(filename, extract_dir):
851 """Unpack zip `filename` to `extract_dir`
852 """
853 try:
854 import zipfile
855 except ImportError:
856 raise ReadError('zlib not supported, cannot unpack this archive.')
857
858 if not zipfile.is_zipfile(filename):
859 raise ReadError("%s is not a zip file" % filename)
860
861 zip = zipfile.ZipFile(filename)
862 try:
863 for info in zip.infolist():
864 name = info.filename
865
866 # don't extract absolute paths or ones with .. in them
867 if name.startswith('/') or '..' in name:
868 continue
869
870 target = os.path.join(extract_dir, *name.split('/'))
871 if not target:
872 continue
873
874 _ensure_directory(target)
875 if not name.endswith('/'):
876 # file
877 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200878 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000879 try:
880 f.write(data)
881 finally:
882 f.close()
883 del data
884 finally:
885 zip.close()
886
887def _unpack_tarfile(filename, extract_dir):
888 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
889 """
890 try:
891 tarobj = tarfile.open(filename)
892 except tarfile.TarError:
893 raise ReadError(
894 "%s is not a compressed or uncompressed tar file" % filename)
895 try:
896 tarobj.extractall(extract_dir)
897 finally:
898 tarobj.close()
899
900_UNPACK_FORMATS = {
901 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000902 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
903 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
904 }
905
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000906if _BZ2_SUPPORTED:
907 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
908 "bzip2'ed tar-file")
909
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000910def _find_unpack_format(filename):
911 for name, info in _UNPACK_FORMATS.items():
912 for extension in info[0]:
913 if filename.endswith(extension):
914 return name
915 return None
916
917def unpack_archive(filename, extract_dir=None, format=None):
918 """Unpack an archive.
919
920 `filename` is the name of the archive.
921
922 `extract_dir` is the name of the target directory, where the archive
923 is unpacked. If not provided, the current working directory is used.
924
925 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
926 other registered format. If not provided, unpack_archive will use the
927 filename extension and see if an unpacker was registered for that
928 extension.
929
930 In case none is found, a ValueError is raised.
931 """
932 if extract_dir is None:
933 extract_dir = os.getcwd()
934
935 if format is not None:
936 try:
937 format_info = _UNPACK_FORMATS[format]
938 except KeyError:
939 raise ValueError("Unknown unpack format '{0}'".format(format))
940
Nick Coghlanabf202d2011-03-16 13:52:20 -0400941 func = format_info[1]
942 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000943 else:
944 # we need to look at the registered unpackers supported extensions
945 format = _find_unpack_format(filename)
946 if format is None:
947 raise ReadError("Unknown archive format '{0}'".format(filename))
948
949 func = _UNPACK_FORMATS[format][1]
950 kwargs = dict(_UNPACK_FORMATS[format][2])
951 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200952
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200953
954if hasattr(os, 'statvfs'):
955
956 __all__.append('disk_usage')
957 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200958
959 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200960 """Return disk usage statistics about the given path.
961
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200962 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200963 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200964 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200965 st = os.statvfs(path)
966 free = st.f_bavail * st.f_frsize
967 total = st.f_blocks * st.f_frsize
968 used = (st.f_blocks - st.f_bfree) * st.f_frsize
969 return _ntuple_diskusage(total, used, free)
970
971elif os.name == 'nt':
972
973 import nt
974 __all__.append('disk_usage')
975 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
976
977 def disk_usage(path):
978 """Return disk usage statistics about the given path.
979
980 Returned valus is a named tuple with attributes 'total', 'used' and
981 'free', which are the amount of total, used and free space, in bytes.
982 """
983 total, free = nt._getdiskusage(path)
984 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200985 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200986
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200987
Sandro Tosid902a142011-08-22 23:28:27 +0200988def chown(path, user=None, group=None):
989 """Change owner user and group of the given path.
990
991 user and group can be the uid/gid or the user/group names, and in that case,
992 they are converted to their respective uid/gid.
993 """
994
995 if user is None and group is None:
996 raise ValueError("user and/or group must be set")
997
998 _user = user
999 _group = group
1000
1001 # -1 means don't change it
1002 if user is None:
1003 _user = -1
1004 # user can either be an int (the uid) or a string (the system username)
1005 elif isinstance(user, str):
1006 _user = _get_uid(user)
1007 if _user is None:
1008 raise LookupError("no such user: {!r}".format(user))
1009
1010 if group is None:
1011 _group = -1
1012 elif not isinstance(group, int):
1013 _group = _get_gid(group)
1014 if _group is None:
1015 raise LookupError("no such group: {!r}".format(group))
1016
1017 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001018
1019def get_terminal_size(fallback=(80, 24)):
1020 """Get the size of the terminal window.
1021
1022 For each of the two dimensions, the environment variable, COLUMNS
1023 and LINES respectively, is checked. If the variable is defined and
1024 the value is a positive integer, it is used.
1025
1026 When COLUMNS or LINES is not defined, which is the common case,
1027 the terminal connected to sys.__stdout__ is queried
1028 by invoking os.get_terminal_size.
1029
1030 If the terminal size cannot be successfully queried, either because
1031 the system doesn't support querying, or because we are not
1032 connected to a terminal, the value given in fallback parameter
1033 is used. Fallback defaults to (80, 24) which is the default
1034 size used by many terminal emulators.
1035
1036 The value returned is a named tuple of type os.terminal_size.
1037 """
1038 # columns, lines are the working values
1039 try:
1040 columns = int(os.environ['COLUMNS'])
1041 except (KeyError, ValueError):
1042 columns = 0
1043
1044 try:
1045 lines = int(os.environ['LINES'])
1046 except (KeyError, ValueError):
1047 lines = 0
1048
1049 # only query if necessary
1050 if columns <= 0 or lines <= 0:
1051 try:
1052 size = os.get_terminal_size(sys.__stdout__.fileno())
1053 except (NameError, OSError):
1054 size = os.terminal_size(fallback)
1055 if columns <= 0:
1056 columns = size.columns
1057 if lines <= 0:
1058 lines = size.lines
1059
1060 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001061
1062def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001063 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001064 conforms to the given mode on the PATH, or None if there is no such
1065 file.
1066
1067 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1068 of os.environ.get("PATH"), or can be overridden with a custom search
1069 path.
1070
1071 """
Brian Curtinc57a3452012-06-22 16:00:30 -05001072 # Check that a given file can be accessed with the correct mode.
1073 # Additionally check that `file` is not a directory, as on Windows
1074 # directories pass the os.access check.
1075 def _access_check(fn, mode):
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001076 return (os.path.exists(fn) and os.access(fn, mode)
1077 and not os.path.isdir(fn))
Brian Curtinc57a3452012-06-22 16:00:30 -05001078
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001079 # If we're given a path with a directory part, look it up directly rather
1080 # than referring to PATH directories. This includes checking relative to the
1081 # current directory, e.g. ./script
1082 if os.path.dirname(cmd):
1083 if _access_check(cmd, mode):
1084 return cmd
1085 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001086
1087 path = (path or os.environ.get("PATH", os.defpath)).split(os.pathsep)
1088
1089 if sys.platform == "win32":
1090 # The current directory takes precedence on Windows.
1091 if not os.curdir in path:
1092 path.insert(0, os.curdir)
1093
1094 # PATHEXT is necessary to check on Windows.
1095 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1096 # See if the given file matches any of the expected path extensions.
1097 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001098 # If it does match, only test that one, otherwise we have to try
1099 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001100 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1101 files = [cmd]
1102 else:
1103 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001104 else:
1105 # On other platforms you don't have things like PATHEXT to tell you
1106 # what file suffixes are executable, so just pass on cmd as-is.
1107 files = [cmd]
1108
1109 seen = set()
1110 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001111 normdir = os.path.normcase(dir)
1112 if not normdir in seen:
1113 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001114 for thefile in files:
1115 name = os.path.join(dir, thefile)
1116 if _access_check(name, mode):
1117 return name
1118 return None