blob: 9a6a0406bd9aa26fe5c6a3e628ccabd5ac14d545 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040020except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000021 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040025except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000026 getpwnam = None
27
28try:
29 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040030except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000031 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Brian Curtinc57a3452012-06-22 16:00:30 -050039 "ignore_patterns", "chown", "which"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020040 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000041
Andrew Svetlov3438fa42012-12-17 23:35:18 +020042class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000043 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Hynek Schlawack48653762012-10-07 12:49:58 +020045class SameFileError(Error):
46 """Raised when source and destination are the same file."""
47
Andrew Svetlov3438fa42012-12-17 23:35:18 +020048class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000049 """Raised when trying to do a kind of operation (e.g. copying) which is
50 not supported on a special file (e.g. a named pipe)"""
51
Andrew Svetlov3438fa42012-12-17 23:35:18 +020052class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000053 """Raised when a command could not be executed"""
54
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000056 """Raised when an archive cannot be read"""
57
58class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030059 """Raised when a registry operation with the archiving
Tarek Ziadé6ac91722010-04-28 17:51:36 +000060 and unpacking registeries fails"""
61
62
Greg Stein42bb8b32000-07-12 09:55:30 +000063def copyfileobj(fsrc, fdst, length=16*1024):
64 """copy data from file-like object fsrc to file-like object fdst"""
65 while 1:
66 buf = fsrc.read(length)
67 if not buf:
68 break
69 fdst.write(buf)
70
Johannes Gijsbers46f14592004-08-14 13:30:02 +000071def _samefile(src, dst):
72 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000073 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000074 try:
75 return os.path.samefile(src, dst)
76 except OSError:
77 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000078
79 # All other platforms: check for same pathname.
80 return (os.path.normcase(os.path.abspath(src)) ==
81 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000082
Larry Hastingsb4038062012-07-15 10:57:38 -070083def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010084 """Copy data from src to dst.
85
Larry Hastingsb4038062012-07-15 10:57:38 -070086 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +010087 symlink will be created instead of copying the file it points to.
88
89 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000090 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +020091 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000092
Antoine Pitrou7fff0962009-05-01 21:09:44 +000093 for fn in [src, dst]:
94 try:
95 st = os.stat(fn)
96 except OSError:
97 # File most likely does not exist
98 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +000099 else:
100 # XXX What about other special files? (sockets, devices...)
101 if stat.S_ISFIFO(st.st_mode):
102 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000103
Larry Hastingsb4038062012-07-15 10:57:38 -0700104 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100105 os.symlink(os.readlink(src), dst)
106 else:
107 with open(src, 'rb') as fsrc:
108 with open(dst, 'wb') as fdst:
109 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500110 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000111
Larry Hastingsb4038062012-07-15 10:57:38 -0700112def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100113 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000114
Larry Hastingsb4038062012-07-15 10:57:38 -0700115 If follow_symlinks is not set, symlinks aren't followed if and only
116 if both `src` and `dst` are symlinks. If `lchmod` isn't available
117 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100118
119 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700120 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100121 if hasattr(os, 'lchmod'):
122 stat_func, chmod_func = os.lstat, os.lchmod
123 else:
124 return
125 elif hasattr(os, 'chmod'):
126 stat_func, chmod_func = os.stat, os.chmod
127 else:
128 return
129
130 st = stat_func(src)
131 chmod_func(dst, stat.S_IMODE(st.st_mode))
132
Larry Hastingsad5ae042012-07-14 17:55:11 -0700133if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700134 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700135 """Copy extended filesystem attributes from `src` to `dst`.
136
137 Overwrite existing attributes.
138
Larry Hastingsb4038062012-07-15 10:57:38 -0700139 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700140
141 """
142
Hynek Schlawack0beab052013-02-05 08:22:44 +0100143 try:
144 names = os.listxattr(src, follow_symlinks=follow_symlinks)
145 except OSError as e:
146 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
147 raise
148 return
149 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700150 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700151 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
152 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700153 except OSError as e:
154 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
155 raise
156else:
157 def _copyxattr(*args, **kwargs):
158 pass
159
Larry Hastingsb4038062012-07-15 10:57:38 -0700160def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100161 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
162
Larry Hastingsb4038062012-07-15 10:57:38 -0700163 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100164 only if both `src` and `dst` are symlinks.
165
166 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700167 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100168 pass
169
Larry Hastings9cf065c2012-06-22 16:30:09 -0700170 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700171 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700172 if follow:
173 # use the real function if it exists
174 def lookup(name):
175 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100176 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700177 # use the real function only if it exists
178 # *and* it supports follow_symlinks
179 def lookup(name):
180 fn = getattr(os, name, _nop)
181 if fn in os.supports_follow_symlinks:
182 return fn
183 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100184
Larry Hastings9cf065c2012-06-22 16:30:09 -0700185 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000186 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700187 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
188 follow_symlinks=follow)
189 try:
190 lookup("chmod")(dst, mode, follow_symlinks=follow)
191 except NotImplementedError:
192 # if we got a NotImplementedError, it's because
193 # * follow_symlinks=False,
194 # * lchown() is unavailable, and
195 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300196 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700197 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
198 # (it returned ENOSUP.)
199 # therefore we're out of options--we simply cannot chown the
200 # symlink. give up, suppress the error.
201 # (which is what shutil always did in this circumstance.)
202 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100203 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000204 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700205 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000206 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700207 for err in 'EOPNOTSUPP', 'ENOTSUP':
208 if hasattr(errno, err) and why.errno == getattr(errno, err):
209 break
210 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000211 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700212 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200213
Larry Hastingsb4038062012-07-15 10:57:38 -0700214def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500215 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000216
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000217 The destination may be a directory.
218
Larry Hastingsb4038062012-07-15 10:57:38 -0700219 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100220 resembles GNU's "cp -P src dst".
221
Hynek Schlawack48653762012-10-07 12:49:58 +0200222 If source and destination are the same file, a SameFileError will be
223 raised.
224
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000225 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000226 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000227 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700228 copyfile(src, dst, follow_symlinks=follow_symlinks)
229 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500230 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000231
Larry Hastingsb4038062012-07-15 10:57:38 -0700232def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500233 """Copy data and all stat info ("cp -p src dst"). Return the file's
234 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000235
236 The destination may be a directory.
237
Larry Hastingsb4038062012-07-15 10:57:38 -0700238 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100239 resembles GNU's "cp -P src dst".
240
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000241 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000242 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000243 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700244 copyfile(src, dst, follow_symlinks=follow_symlinks)
245 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500246 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000247
Georg Brandl2ee470f2008-07-16 12:55:28 +0000248def ignore_patterns(*patterns):
249 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000250
Georg Brandl2ee470f2008-07-16 12:55:28 +0000251 Patterns is a sequence of glob-style patterns
252 that are used to exclude files"""
253 def _ignore_patterns(path, names):
254 ignored_names = []
255 for pattern in patterns:
256 ignored_names.extend(fnmatch.filter(names, pattern))
257 return set(ignored_names)
258 return _ignore_patterns
259
Tarek Ziadéfb437512010-04-20 08:57:33 +0000260def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
261 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000262 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000263
264 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000265 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000266
267 If the optional symlinks flag is true, symbolic links in the
268 source tree result in symbolic links in the destination tree; if
269 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000270 links are copied. If the file pointed by the symlink doesn't
271 exist, an exception will be added in the list of errors raised in
272 an Error exception at the end of the copy process.
273
274 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000275 want to silence this exception. Notice that this has no effect on
276 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000277
Georg Brandl2ee470f2008-07-16 12:55:28 +0000278 The optional ignore argument is a callable. If given, it
279 is called with the `src` parameter, which is the directory
280 being visited by copytree(), and `names` which is the list of
281 `src` contents, as returned by os.listdir():
282
283 callable(src, names) -> ignored_names
284
285 Since copytree() is called recursively, the callable will be
286 called once for each directory that is copied. It returns a
287 list of names relative to the `src` directory that should
288 not be copied.
289
Tarek Ziadé5340db32010-04-19 22:30:51 +0000290 The optional copy_function argument is a callable that will be used
291 to copy each file. It will be called with the source path and the
292 destination path as arguments. By default, copy2() is used, but any
293 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000294
295 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000296 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000297 if ignore is not None:
298 ignored_names = ignore(src, names)
299 else:
300 ignored_names = set()
301
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000302 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000303 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000304 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000305 if name in ignored_names:
306 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000307 srcname = os.path.join(src, name)
308 dstname = os.path.join(dst, name)
309 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000310 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000311 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000312 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100313 # We can't just leave it to `copy_function` because legacy
314 # code with a custom `copy_function` may rely on copytree
315 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000316 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700317 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000318 else:
319 # ignore dangling symlink if the flag is on
320 if not os.path.exists(linkto) and ignore_dangling_symlinks:
321 continue
322 # otherwise let the copy occurs. copy2 will raise an error
323 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000324 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000325 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000326 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000327 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000328 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000329 # catch the Error from the recursive copytree so that we can
330 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000331 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000332 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200333 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000334 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000335 try:
336 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000337 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200338 # Copying file access times may fail on Windows
339 if why.winerror is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200340 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000341 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000342 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500343 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000344
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200345# version vulnerable to race conditions
346def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000347 try:
348 if os.path.islink(path):
349 # symlinks to directories are forbidden, see bug #1669
350 raise OSError("Cannot call rmtree on a symbolic link")
351 except OSError:
352 onerror(os.path.islink, path, sys.exc_info())
353 # can't continue even if onerror hook returns
354 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000355 names = []
356 try:
357 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200358 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000359 onerror(os.listdir, path, sys.exc_info())
360 for name in names:
361 fullname = os.path.join(path, name)
362 try:
363 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200364 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000365 mode = 0
366 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200367 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000368 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000369 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200370 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200371 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200372 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000373 try:
374 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200375 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000376 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000377
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200378# Version using fd-based APIs to protect against races
379def _rmtree_safe_fd(topfd, path, onerror):
380 names = []
381 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200382 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100383 except OSError as err:
384 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200385 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200386 for name in names:
387 fullname = os.path.join(path, name)
388 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200389 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200390 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100391 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200392 mode = 0
393 if stat.S_ISDIR(mode):
394 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200395 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100396 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200397 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200398 else:
399 try:
400 if os.path.samestat(orig_st, os.fstat(dirfd)):
401 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200402 try:
403 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100404 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200405 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100406 else:
407 try:
408 # This can only happen if someone replaces
409 # a directory with a symlink after the call to
410 # stat.S_ISDIR above.
411 raise OSError("Cannot call rmtree on a symbolic "
412 "link")
413 except OSError:
414 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200415 finally:
416 os.close(dirfd)
417 else:
418 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200419 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100420 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200421 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200422
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200423_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
424 os.supports_dir_fd and
425 os.listdir in os.supports_fd and
426 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000427
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200428def rmtree(path, ignore_errors=False, onerror=None):
429 """Recursively delete a directory tree.
430
431 If ignore_errors is set, errors are ignored; otherwise, if onerror
432 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200433 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200434 path is the argument to that function that caused it to fail; and
435 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
436 is false and onerror is None, an exception is raised.
437
438 """
439 if ignore_errors:
440 def onerror(*args):
441 pass
442 elif onerror is None:
443 def onerror(*args):
444 raise
445 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200446 # While the unsafe rmtree works fine on bytes, the fd based does not.
447 if isinstance(path, bytes):
448 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200449 # Note: To guard against symlink races, we use the standard
450 # lstat()/open()/fstat() trick.
451 try:
452 orig_st = os.lstat(path)
453 except Exception:
454 onerror(os.lstat, path, sys.exc_info())
455 return
456 try:
457 fd = os.open(path, os.O_RDONLY)
458 except Exception:
459 onerror(os.lstat, path, sys.exc_info())
460 return
461 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100462 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200463 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200464 try:
465 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200466 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200467 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200468 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100469 try:
470 # symlinks to directories are forbidden, see bug #1669
471 raise OSError("Cannot call rmtree on a symbolic link")
472 except OSError:
473 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200474 finally:
475 os.close(fd)
476 else:
477 return _rmtree_unsafe(path, onerror)
478
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000479# Allow introspection of whether or not the hardening against symlink
480# attacks is supported on the current platform
481rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000482
Christian Heimesada8c3b2008-03-18 18:26:33 +0000483def _basename(path):
484 # A basename() variant which first strips the trailing slash, if present.
485 # Thus we always get the last component of the path, even for directories.
486 return os.path.basename(path.rstrip(os.path.sep))
487
488def move(src, dst):
489 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500490 similar to the Unix "mv" command. Return the file or directory's
491 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000492
493 If the destination is a directory or a symlink to a directory, the source
494 is moved inside the directory. The destination path must not already
495 exist.
496
497 If the destination already exists but is not a directory, it may be
498 overwritten depending on os.rename() semantics.
499
500 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100501 Otherwise, src is copied to the destination and then removed. Symlinks are
502 recreated under the new name if os.rename() fails because of cross
503 filesystem renames.
504
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000505 A lot more could be done here... A look at a mv.c shows a lot of
506 the issues this implementation glosses over.
507
508 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000509 real_dst = dst
510 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200511 if _samefile(src, dst):
512 # We might be on a case insensitive filesystem,
513 # perform the rename anyway.
514 os.rename(src, dst)
515 return
516
Christian Heimesada8c3b2008-03-18 18:26:33 +0000517 real_dst = os.path.join(dst, _basename(src))
518 if os.path.exists(real_dst):
519 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000520 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000521 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200522 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100523 if os.path.islink(src):
524 linkto = os.readlink(src)
525 os.symlink(linkto, real_dst)
526 os.unlink(src)
527 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000528 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000529 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000530 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000531 rmtree(src)
532 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000533 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000534 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500535 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000536
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000537def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000538 src = abspath(src)
539 dst = abspath(dst)
540 if not src.endswith(os.path.sep):
541 src += os.path.sep
542 if not dst.endswith(os.path.sep):
543 dst += os.path.sep
544 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000545
546def _get_gid(name):
547 """Returns a gid, given a group name."""
548 if getgrnam is None or name is None:
549 return None
550 try:
551 result = getgrnam(name)
552 except KeyError:
553 result = None
554 if result is not None:
555 return result[2]
556 return None
557
558def _get_uid(name):
559 """Returns an uid, given a user name."""
560 if getpwnam is None or name is None:
561 return None
562 try:
563 result = getpwnam(name)
564 except KeyError:
565 result = None
566 if result is not None:
567 return result[2]
568 return None
569
570def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
571 owner=None, group=None, logger=None):
572 """Create a (possibly compressed) tar file from all the files under
573 'base_dir'.
574
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000575 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000576
577 'owner' and 'group' can be used to define an owner and a group for the
578 archive that is being built. If not provided, the current owner and group
579 will be used.
580
Éric Araujo4433a5f2010-12-15 20:26:30 +0000581 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000582 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000583
584 Returns the output filename.
585 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000586 tar_compression = {'gzip': 'gz', None: ''}
587 compress_ext = {'gzip': '.gz'}
588
589 if _BZ2_SUPPORTED:
590 tar_compression['bzip2'] = 'bz2'
591 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000592
593 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200594 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000595 raise ValueError("bad value for 'compress', or compression format not "
596 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000597
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000598 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000599 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000600
Tarek Ziadé396fad72010-02-23 05:30:31 +0000601 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000602 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200603 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000604 if not dry_run:
605 os.makedirs(archive_dir)
606
Tarek Ziadé396fad72010-02-23 05:30:31 +0000607 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000608 if logger is not None:
609 logger.info('Creating tar archive')
610
611 uid = _get_uid(owner)
612 gid = _get_gid(group)
613
614 def _set_uid_gid(tarinfo):
615 if gid is not None:
616 tarinfo.gid = gid
617 tarinfo.gname = group
618 if uid is not None:
619 tarinfo.uid = uid
620 tarinfo.uname = owner
621 return tarinfo
622
623 if not dry_run:
624 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
625 try:
626 tar.add(base_dir, filter=_set_uid_gid)
627 finally:
628 tar.close()
629
Tarek Ziadé396fad72010-02-23 05:30:31 +0000630 return archive_name
631
Tarek Ziadée2124162010-04-21 13:35:21 +0000632def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000633 # XXX see if we want to keep an external call here
634 if verbose:
635 zipoptions = "-r"
636 else:
637 zipoptions = "-rq"
638 from distutils.errors import DistutilsExecError
639 from distutils.spawn import spawn
640 try:
641 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
642 except DistutilsExecError:
643 # XXX really should distinguish between "couldn't find
644 # external 'zip' command" and "zip failed".
645 raise ExecError("unable to create zip file '%s': "
646 "could neither import the 'zipfile' module nor "
647 "find a standalone zip utility") % zip_filename
648
649def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
650 """Create a zip file from all the files under 'base_dir'.
651
Éric Araujo4433a5f2010-12-15 20:26:30 +0000652 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000653 "zipfile" Python module (if available) or the InfoZIP "zip" utility
654 (if installed and found on the default search path). If neither tool is
655 available, raises ExecError. Returns the name of the output zip
656 file.
657 """
658 zip_filename = base_name + ".zip"
659 archive_dir = os.path.dirname(base_name)
660
661 if not os.path.exists(archive_dir):
662 if logger is not None:
663 logger.info("creating %s", archive_dir)
664 if not dry_run:
665 os.makedirs(archive_dir)
666
667 # If zipfile module is not available, try spawning an external 'zip'
668 # command.
669 try:
670 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400671 except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000672 zipfile = None
673
674 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000675 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000676 else:
677 if logger is not None:
678 logger.info("creating '%s' and adding '%s' to it",
679 zip_filename, base_dir)
680
681 if not dry_run:
Benjamin Peterson091d3862014-02-02 15:30:22 -0500682 with zipfile.ZipFile(zip_filename, "w",
683 compression=zipfile.ZIP_DEFLATED) as zf:
684 for dirpath, dirnames, filenames in os.walk(base_dir):
685 for name in filenames:
686 path = os.path.normpath(os.path.join(dirpath, name))
687 if os.path.isfile(path):
688 zf.write(path, path)
689 if logger is not None:
690 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000691
692 return zip_filename
693
694_ARCHIVE_FORMATS = {
695 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000696 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200697 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000698 }
699
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000700if _BZ2_SUPPORTED:
701 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
702 "bzip2'ed tar-file")
703
Tarek Ziadé396fad72010-02-23 05:30:31 +0000704def get_archive_formats():
705 """Returns a list of supported formats for archiving and unarchiving.
706
707 Each element of the returned sequence is a tuple (name, description)
708 """
709 formats = [(name, registry[2]) for name, registry in
710 _ARCHIVE_FORMATS.items()]
711 formats.sort()
712 return formats
713
714def register_archive_format(name, function, extra_args=None, description=''):
715 """Registers an archive format.
716
717 name is the name of the format. function is the callable that will be
718 used to create archives. If provided, extra_args is a sequence of
719 (name, value) tuples that will be passed as arguments to the callable.
720 description can be provided to describe the format, and will be returned
721 by the get_archive_formats() function.
722 """
723 if extra_args is None:
724 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200725 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000726 raise TypeError('The %s object is not callable' % function)
727 if not isinstance(extra_args, (tuple, list)):
728 raise TypeError('extra_args needs to be a sequence')
729 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200730 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000731 raise TypeError('extra_args elements are : (arg_name, value)')
732
733 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
734
735def unregister_archive_format(name):
736 del _ARCHIVE_FORMATS[name]
737
738def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
739 dry_run=0, owner=None, group=None, logger=None):
740 """Create an archive file (eg. zip or tar).
741
742 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000743 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
744 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000745
746 'root_dir' is a directory that will be the root directory of the
747 archive; ie. we typically chdir into 'root_dir' before creating the
748 archive. 'base_dir' is the directory where we start archiving from;
749 ie. 'base_dir' will be the common prefix of all files and
750 directories in the archive. 'root_dir' and 'base_dir' both default
751 to the current directory. Returns the name of the archive file.
752
753 'owner' and 'group' are used when creating a tar archive. By default,
754 uses the current owner and group.
755 """
756 save_cwd = os.getcwd()
757 if root_dir is not None:
758 if logger is not None:
759 logger.debug("changing into '%s'", root_dir)
760 base_name = os.path.abspath(base_name)
761 if not dry_run:
762 os.chdir(root_dir)
763
764 if base_dir is None:
765 base_dir = os.curdir
766
767 kwargs = {'dry_run': dry_run, 'logger': logger}
768
769 try:
770 format_info = _ARCHIVE_FORMATS[format]
771 except KeyError:
772 raise ValueError("unknown archive format '%s'" % format)
773
774 func = format_info[0]
775 for arg, val in format_info[1]:
776 kwargs[arg] = val
777
778 if format != 'zip':
779 kwargs['owner'] = owner
780 kwargs['group'] = group
781
782 try:
783 filename = func(base_name, base_dir, **kwargs)
784 finally:
785 if root_dir is not None:
786 if logger is not None:
787 logger.debug("changing back to '%s'", save_cwd)
788 os.chdir(save_cwd)
789
790 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000791
792
793def get_unpack_formats():
794 """Returns a list of supported formats for unpacking.
795
796 Each element of the returned sequence is a tuple
797 (name, extensions, description)
798 """
799 formats = [(name, info[0], info[3]) for name, info in
800 _UNPACK_FORMATS.items()]
801 formats.sort()
802 return formats
803
804def _check_unpack_options(extensions, function, extra_args):
805 """Checks what gets registered as an unpacker."""
806 # first make sure no other unpacker is registered for this extension
807 existing_extensions = {}
808 for name, info in _UNPACK_FORMATS.items():
809 for ext in info[0]:
810 existing_extensions[ext] = name
811
812 for extension in extensions:
813 if extension in existing_extensions:
814 msg = '%s is already registered for "%s"'
815 raise RegistryError(msg % (extension,
816 existing_extensions[extension]))
817
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200818 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000819 raise TypeError('The registered function must be a callable')
820
821
822def register_unpack_format(name, extensions, function, extra_args=None,
823 description=''):
824 """Registers an unpack format.
825
826 `name` is the name of the format. `extensions` is a list of extensions
827 corresponding to the format.
828
829 `function` is the callable that will be
830 used to unpack archives. The callable will receive archives to unpack.
831 If it's unable to handle an archive, it needs to raise a ReadError
832 exception.
833
834 If provided, `extra_args` is a sequence of
835 (name, value) tuples that will be passed as arguments to the callable.
836 description can be provided to describe the format, and will be returned
837 by the get_unpack_formats() function.
838 """
839 if extra_args is None:
840 extra_args = []
841 _check_unpack_options(extensions, function, extra_args)
842 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
843
844def unregister_unpack_format(name):
845 """Removes the pack format from the registery."""
846 del _UNPACK_FORMATS[name]
847
848def _ensure_directory(path):
849 """Ensure that the parent directory of `path` exists"""
850 dirname = os.path.dirname(path)
851 if not os.path.isdir(dirname):
852 os.makedirs(dirname)
853
854def _unpack_zipfile(filename, extract_dir):
855 """Unpack zip `filename` to `extract_dir`
856 """
857 try:
858 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400859 except ImportError:
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000860 raise ReadError('zlib not supported, cannot unpack this archive.')
861
862 if not zipfile.is_zipfile(filename):
863 raise ReadError("%s is not a zip file" % filename)
864
865 zip = zipfile.ZipFile(filename)
866 try:
867 for info in zip.infolist():
868 name = info.filename
869
870 # don't extract absolute paths or ones with .. in them
871 if name.startswith('/') or '..' in name:
872 continue
873
874 target = os.path.join(extract_dir, *name.split('/'))
875 if not target:
876 continue
877
878 _ensure_directory(target)
879 if not name.endswith('/'):
880 # file
881 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200882 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000883 try:
884 f.write(data)
885 finally:
886 f.close()
887 del data
888 finally:
889 zip.close()
890
891def _unpack_tarfile(filename, extract_dir):
892 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
893 """
894 try:
895 tarobj = tarfile.open(filename)
896 except tarfile.TarError:
897 raise ReadError(
898 "%s is not a compressed or uncompressed tar file" % filename)
899 try:
900 tarobj.extractall(extract_dir)
901 finally:
902 tarobj.close()
903
904_UNPACK_FORMATS = {
905 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000906 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
907 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
908 }
909
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000910if _BZ2_SUPPORTED:
911 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
912 "bzip2'ed tar-file")
913
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000914def _find_unpack_format(filename):
915 for name, info in _UNPACK_FORMATS.items():
916 for extension in info[0]:
917 if filename.endswith(extension):
918 return name
919 return None
920
921def unpack_archive(filename, extract_dir=None, format=None):
922 """Unpack an archive.
923
924 `filename` is the name of the archive.
925
926 `extract_dir` is the name of the target directory, where the archive
927 is unpacked. If not provided, the current working directory is used.
928
929 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
930 other registered format. If not provided, unpack_archive will use the
931 filename extension and see if an unpacker was registered for that
932 extension.
933
934 In case none is found, a ValueError is raised.
935 """
936 if extract_dir is None:
937 extract_dir = os.getcwd()
938
939 if format is not None:
940 try:
941 format_info = _UNPACK_FORMATS[format]
942 except KeyError:
943 raise ValueError("Unknown unpack format '{0}'".format(format))
944
Nick Coghlanabf202d2011-03-16 13:52:20 -0400945 func = format_info[1]
946 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000947 else:
948 # we need to look at the registered unpackers supported extensions
949 format = _find_unpack_format(filename)
950 if format is None:
951 raise ReadError("Unknown archive format '{0}'".format(filename))
952
953 func = _UNPACK_FORMATS[format][1]
954 kwargs = dict(_UNPACK_FORMATS[format][2])
955 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200956
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200957
958if hasattr(os, 'statvfs'):
959
960 __all__.append('disk_usage')
961 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200962
963 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200964 """Return disk usage statistics about the given path.
965
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200966 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200967 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200968 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200969 st = os.statvfs(path)
970 free = st.f_bavail * st.f_frsize
971 total = st.f_blocks * st.f_frsize
972 used = (st.f_blocks - st.f_bfree) * st.f_frsize
973 return _ntuple_diskusage(total, used, free)
974
975elif os.name == 'nt':
976
977 import nt
978 __all__.append('disk_usage')
979 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
980
981 def disk_usage(path):
982 """Return disk usage statistics about the given path.
983
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300984 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200985 'free', which are the amount of total, used and free space, in bytes.
986 """
987 total, free = nt._getdiskusage(path)
988 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200989 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200990
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200991
Sandro Tosid902a142011-08-22 23:28:27 +0200992def chown(path, user=None, group=None):
993 """Change owner user and group of the given path.
994
995 user and group can be the uid/gid or the user/group names, and in that case,
996 they are converted to their respective uid/gid.
997 """
998
999 if user is None and group is None:
1000 raise ValueError("user and/or group must be set")
1001
1002 _user = user
1003 _group = group
1004
1005 # -1 means don't change it
1006 if user is None:
1007 _user = -1
1008 # user can either be an int (the uid) or a string (the system username)
1009 elif isinstance(user, str):
1010 _user = _get_uid(user)
1011 if _user is None:
1012 raise LookupError("no such user: {!r}".format(user))
1013
1014 if group is None:
1015 _group = -1
1016 elif not isinstance(group, int):
1017 _group = _get_gid(group)
1018 if _group is None:
1019 raise LookupError("no such group: {!r}".format(group))
1020
1021 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001022
1023def get_terminal_size(fallback=(80, 24)):
1024 """Get the size of the terminal window.
1025
1026 For each of the two dimensions, the environment variable, COLUMNS
1027 and LINES respectively, is checked. If the variable is defined and
1028 the value is a positive integer, it is used.
1029
1030 When COLUMNS or LINES is not defined, which is the common case,
1031 the terminal connected to sys.__stdout__ is queried
1032 by invoking os.get_terminal_size.
1033
1034 If the terminal size cannot be successfully queried, either because
1035 the system doesn't support querying, or because we are not
1036 connected to a terminal, the value given in fallback parameter
1037 is used. Fallback defaults to (80, 24) which is the default
1038 size used by many terminal emulators.
1039
1040 The value returned is a named tuple of type os.terminal_size.
1041 """
1042 # columns, lines are the working values
1043 try:
1044 columns = int(os.environ['COLUMNS'])
1045 except (KeyError, ValueError):
1046 columns = 0
1047
1048 try:
1049 lines = int(os.environ['LINES'])
1050 except (KeyError, ValueError):
1051 lines = 0
1052
1053 # only query if necessary
1054 if columns <= 0 or lines <= 0:
1055 try:
1056 size = os.get_terminal_size(sys.__stdout__.fileno())
1057 except (NameError, OSError):
1058 size = os.terminal_size(fallback)
1059 if columns <= 0:
1060 columns = size.columns
1061 if lines <= 0:
1062 lines = size.lines
1063
1064 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001065
1066def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001067 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001068 conforms to the given mode on the PATH, or None if there is no such
1069 file.
1070
1071 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1072 of os.environ.get("PATH"), or can be overridden with a custom search
1073 path.
1074
1075 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001076 # Check that a given file can be accessed with the correct mode.
1077 # Additionally check that `file` is not a directory, as on Windows
1078 # directories pass the os.access check.
1079 def _access_check(fn, mode):
1080 return (os.path.exists(fn) and os.access(fn, mode)
1081 and not os.path.isdir(fn))
1082
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001083 # If we're given a path with a directory part, look it up directly rather
1084 # than referring to PATH directories. This includes checking relative to the
1085 # current directory, e.g. ./script
1086 if os.path.dirname(cmd):
1087 if _access_check(cmd, mode):
1088 return cmd
1089 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001090
Barry Warsaw618738b2013-04-16 11:05:03 -04001091 if path is None:
1092 path = os.environ.get("PATH", os.defpath)
1093 if not path:
1094 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001095 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001096
1097 if sys.platform == "win32":
1098 # The current directory takes precedence on Windows.
1099 if not os.curdir in path:
1100 path.insert(0, os.curdir)
1101
1102 # PATHEXT is necessary to check on Windows.
1103 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1104 # See if the given file matches any of the expected path extensions.
1105 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001106 # If it does match, only test that one, otherwise we have to try
1107 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001108 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1109 files = [cmd]
1110 else:
1111 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001112 else:
1113 # On other platforms you don't have things like PATHEXT to tell you
1114 # what file suffixes are executable, so just pass on cmd as-is.
1115 files = [cmd]
1116
1117 seen = set()
1118 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001119 normdir = os.path.normcase(dir)
1120 if not normdir in seen:
1121 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001122 for thefile in files:
1123 name = os.path.join(dir, thefile)
1124 if _access_check(name, mode):
1125 return name
1126 return None