blob: b5e7cbe3d5daa62be82046b8631d5e4e00c6d93d [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040020except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000021 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040025except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000026 getpwnam = None
27
28try:
29 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040030except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000031 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020039 "ignore_patterns", "chown", "which", "get_terminal_size",
40 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020041 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000042
Andrew Svetlov3438fa42012-12-17 23:35:18 +020043class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000044 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000045
Hynek Schlawack48653762012-10-07 12:49:58 +020046class SameFileError(Error):
47 """Raised when source and destination are the same file."""
48
Andrew Svetlov3438fa42012-12-17 23:35:18 +020049class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000050 """Raised when trying to do a kind of operation (e.g. copying) which is
51 not supported on a special file (e.g. a named pipe)"""
52
Andrew Svetlov3438fa42012-12-17 23:35:18 +020053class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000054 """Raised when a command could not be executed"""
55
Andrew Svetlov3438fa42012-12-17 23:35:18 +020056class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000057 """Raised when an archive cannot be read"""
58
59class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030060 """Raised when a registry operation with the archiving
Tarek Ziadé6ac91722010-04-28 17:51:36 +000061 and unpacking registeries fails"""
62
63
Greg Stein42bb8b32000-07-12 09:55:30 +000064def copyfileobj(fsrc, fdst, length=16*1024):
65 """copy data from file-like object fsrc to file-like object fdst"""
66 while 1:
67 buf = fsrc.read(length)
68 if not buf:
69 break
70 fdst.write(buf)
71
Johannes Gijsbers46f14592004-08-14 13:30:02 +000072def _samefile(src, dst):
73 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000074 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000075 try:
76 return os.path.samefile(src, dst)
77 except OSError:
78 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000079
80 # All other platforms: check for same pathname.
81 return (os.path.normcase(os.path.abspath(src)) ==
82 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000083
Larry Hastingsb4038062012-07-15 10:57:38 -070084def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010085 """Copy data from src to dst.
86
Larry Hastingsb4038062012-07-15 10:57:38 -070087 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +010088 symlink will be created instead of copying the file it points to.
89
90 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000091 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +020092 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000093
Antoine Pitrou7fff0962009-05-01 21:09:44 +000094 for fn in [src, dst]:
95 try:
96 st = os.stat(fn)
97 except OSError:
98 # File most likely does not exist
99 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000100 else:
101 # XXX What about other special files? (sockets, devices...)
102 if stat.S_ISFIFO(st.st_mode):
103 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000104
Larry Hastingsb4038062012-07-15 10:57:38 -0700105 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100106 os.symlink(os.readlink(src), dst)
107 else:
108 with open(src, 'rb') as fsrc:
109 with open(dst, 'wb') as fdst:
110 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500111 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000112
Larry Hastingsb4038062012-07-15 10:57:38 -0700113def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100114 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000115
Larry Hastingsb4038062012-07-15 10:57:38 -0700116 If follow_symlinks is not set, symlinks aren't followed if and only
117 if both `src` and `dst` are symlinks. If `lchmod` isn't available
118 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100119
120 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700121 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100122 if hasattr(os, 'lchmod'):
123 stat_func, chmod_func = os.lstat, os.lchmod
124 else:
125 return
126 elif hasattr(os, 'chmod'):
127 stat_func, chmod_func = os.stat, os.chmod
128 else:
129 return
130
131 st = stat_func(src)
132 chmod_func(dst, stat.S_IMODE(st.st_mode))
133
Larry Hastingsad5ae042012-07-14 17:55:11 -0700134if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700135 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700136 """Copy extended filesystem attributes from `src` to `dst`.
137
138 Overwrite existing attributes.
139
Larry Hastingsb4038062012-07-15 10:57:38 -0700140 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700141
142 """
143
Hynek Schlawack0beab052013-02-05 08:22:44 +0100144 try:
145 names = os.listxattr(src, follow_symlinks=follow_symlinks)
146 except OSError as e:
147 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
148 raise
149 return
150 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700151 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700152 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
153 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700154 except OSError as e:
155 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
156 raise
157else:
158 def _copyxattr(*args, **kwargs):
159 pass
160
Larry Hastingsb4038062012-07-15 10:57:38 -0700161def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100162 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
163
Larry Hastingsb4038062012-07-15 10:57:38 -0700164 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100165 only if both `src` and `dst` are symlinks.
166
167 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700168 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100169 pass
170
Larry Hastings9cf065c2012-06-22 16:30:09 -0700171 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700172 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700173 if follow:
174 # use the real function if it exists
175 def lookup(name):
176 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100177 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700178 # use the real function only if it exists
179 # *and* it supports follow_symlinks
180 def lookup(name):
181 fn = getattr(os, name, _nop)
182 if fn in os.supports_follow_symlinks:
183 return fn
184 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100185
Larry Hastings9cf065c2012-06-22 16:30:09 -0700186 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000187 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700188 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
189 follow_symlinks=follow)
190 try:
191 lookup("chmod")(dst, mode, follow_symlinks=follow)
192 except NotImplementedError:
193 # if we got a NotImplementedError, it's because
194 # * follow_symlinks=False,
195 # * lchown() is unavailable, and
196 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300197 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700198 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
199 # (it returned ENOSUP.)
200 # therefore we're out of options--we simply cannot chown the
201 # symlink. give up, suppress the error.
202 # (which is what shutil always did in this circumstance.)
203 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100204 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000205 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700206 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000207 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700208 for err in 'EOPNOTSUPP', 'ENOTSUP':
209 if hasattr(errno, err) and why.errno == getattr(errno, err):
210 break
211 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000212 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700213 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200214
Larry Hastingsb4038062012-07-15 10:57:38 -0700215def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500216 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000217
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000218 The destination may be a directory.
219
Larry Hastingsb4038062012-07-15 10:57:38 -0700220 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100221 resembles GNU's "cp -P src dst".
222
Hynek Schlawack48653762012-10-07 12:49:58 +0200223 If source and destination are the same file, a SameFileError will be
224 raised.
225
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000226 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000227 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000228 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700229 copyfile(src, dst, follow_symlinks=follow_symlinks)
230 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500231 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000232
Larry Hastingsb4038062012-07-15 10:57:38 -0700233def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500234 """Copy data and all stat info ("cp -p src dst"). Return the file's
235 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000236
237 The destination may be a directory.
238
Larry Hastingsb4038062012-07-15 10:57:38 -0700239 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100240 resembles GNU's "cp -P src dst".
241
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000242 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000243 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700245 copyfile(src, dst, follow_symlinks=follow_symlinks)
246 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500247 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000248
Georg Brandl2ee470f2008-07-16 12:55:28 +0000249def ignore_patterns(*patterns):
250 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000251
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 Patterns is a sequence of glob-style patterns
253 that are used to exclude files"""
254 def _ignore_patterns(path, names):
255 ignored_names = []
256 for pattern in patterns:
257 ignored_names.extend(fnmatch.filter(names, pattern))
258 return set(ignored_names)
259 return _ignore_patterns
260
Tarek Ziadéfb437512010-04-20 08:57:33 +0000261def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
262 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000263 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000264
265 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000266 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000267
268 If the optional symlinks flag is true, symbolic links in the
269 source tree result in symbolic links in the destination tree; if
270 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000271 links are copied. If the file pointed by the symlink doesn't
272 exist, an exception will be added in the list of errors raised in
273 an Error exception at the end of the copy process.
274
275 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000276 want to silence this exception. Notice that this has no effect on
277 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000278
Georg Brandl2ee470f2008-07-16 12:55:28 +0000279 The optional ignore argument is a callable. If given, it
280 is called with the `src` parameter, which is the directory
281 being visited by copytree(), and `names` which is the list of
282 `src` contents, as returned by os.listdir():
283
284 callable(src, names) -> ignored_names
285
286 Since copytree() is called recursively, the callable will be
287 called once for each directory that is copied. It returns a
288 list of names relative to the `src` directory that should
289 not be copied.
290
Tarek Ziadé5340db32010-04-19 22:30:51 +0000291 The optional copy_function argument is a callable that will be used
292 to copy each file. It will be called with the source path and the
293 destination path as arguments. By default, copy2() is used, but any
294 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000295
296 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000297 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000298 if ignore is not None:
299 ignored_names = ignore(src, names)
300 else:
301 ignored_names = set()
302
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000303 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000304 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000305 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000306 if name in ignored_names:
307 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000308 srcname = os.path.join(src, name)
309 dstname = os.path.join(dst, name)
310 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000311 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000312 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000313 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100314 # We can't just leave it to `copy_function` because legacy
315 # code with a custom `copy_function` may rely on copytree
316 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000317 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700318 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000319 else:
320 # ignore dangling symlink if the flag is on
321 if not os.path.exists(linkto) and ignore_dangling_symlinks:
322 continue
323 # otherwise let the copy occurs. copy2 will raise an error
324 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000325 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000326 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000327 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000328 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000329 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000330 # catch the Error from the recursive copytree so that we can
331 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000332 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000333 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200334 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000335 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000336 try:
337 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000338 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200339 # Copying file access times may fail on Windows
340 if why.winerror is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200341 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000342 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000343 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500344 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000345
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200346# version vulnerable to race conditions
347def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000348 try:
349 if os.path.islink(path):
350 # symlinks to directories are forbidden, see bug #1669
351 raise OSError("Cannot call rmtree on a symbolic link")
352 except OSError:
353 onerror(os.path.islink, path, sys.exc_info())
354 # can't continue even if onerror hook returns
355 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000356 names = []
357 try:
358 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200359 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000360 onerror(os.listdir, path, sys.exc_info())
361 for name in names:
362 fullname = os.path.join(path, name)
363 try:
364 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200365 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000366 mode = 0
367 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200368 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000369 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000370 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200371 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200372 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200373 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000374 try:
375 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200376 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000377 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000378
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200379# Version using fd-based APIs to protect against races
380def _rmtree_safe_fd(topfd, path, onerror):
381 names = []
382 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200383 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100384 except OSError as err:
385 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200386 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200387 for name in names:
388 fullname = os.path.join(path, name)
389 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200390 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200391 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100392 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200393 mode = 0
394 if stat.S_ISDIR(mode):
395 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200396 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100397 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200398 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200399 else:
400 try:
401 if os.path.samestat(orig_st, os.fstat(dirfd)):
402 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200403 try:
404 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100405 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200406 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100407 else:
408 try:
409 # This can only happen if someone replaces
410 # a directory with a symlink after the call to
411 # stat.S_ISDIR above.
412 raise OSError("Cannot call rmtree on a symbolic "
413 "link")
414 except OSError:
415 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200416 finally:
417 os.close(dirfd)
418 else:
419 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200420 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100421 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200422 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200423
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200424_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
425 os.supports_dir_fd and
426 os.listdir in os.supports_fd and
427 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000428
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200429def rmtree(path, ignore_errors=False, onerror=None):
430 """Recursively delete a directory tree.
431
432 If ignore_errors is set, errors are ignored; otherwise, if onerror
433 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200434 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200435 path is the argument to that function that caused it to fail; and
436 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
437 is false and onerror is None, an exception is raised.
438
439 """
440 if ignore_errors:
441 def onerror(*args):
442 pass
443 elif onerror is None:
444 def onerror(*args):
445 raise
446 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200447 # While the unsafe rmtree works fine on bytes, the fd based does not.
448 if isinstance(path, bytes):
449 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200450 # Note: To guard against symlink races, we use the standard
451 # lstat()/open()/fstat() trick.
452 try:
453 orig_st = os.lstat(path)
454 except Exception:
455 onerror(os.lstat, path, sys.exc_info())
456 return
457 try:
458 fd = os.open(path, os.O_RDONLY)
459 except Exception:
460 onerror(os.lstat, path, sys.exc_info())
461 return
462 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100463 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200464 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200465 try:
466 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200467 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200468 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200469 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100470 try:
471 # symlinks to directories are forbidden, see bug #1669
472 raise OSError("Cannot call rmtree on a symbolic link")
473 except OSError:
474 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200475 finally:
476 os.close(fd)
477 else:
478 return _rmtree_unsafe(path, onerror)
479
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000480# Allow introspection of whether or not the hardening against symlink
481# attacks is supported on the current platform
482rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000483
Christian Heimesada8c3b2008-03-18 18:26:33 +0000484def _basename(path):
485 # A basename() variant which first strips the trailing slash, if present.
486 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200487 sep = os.path.sep + (os.path.altsep or '')
488 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000489
490def move(src, dst):
491 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500492 similar to the Unix "mv" command. Return the file or directory's
493 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000494
495 If the destination is a directory or a symlink to a directory, the source
496 is moved inside the directory. The destination path must not already
497 exist.
498
499 If the destination already exists but is not a directory, it may be
500 overwritten depending on os.rename() semantics.
501
502 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100503 Otherwise, src is copied to the destination and then removed. Symlinks are
504 recreated under the new name if os.rename() fails because of cross
505 filesystem renames.
506
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000507 A lot more could be done here... A look at a mv.c shows a lot of
508 the issues this implementation glosses over.
509
510 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000511 real_dst = dst
512 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200513 if _samefile(src, dst):
514 # We might be on a case insensitive filesystem,
515 # perform the rename anyway.
516 os.rename(src, dst)
517 return
518
Christian Heimesada8c3b2008-03-18 18:26:33 +0000519 real_dst = os.path.join(dst, _basename(src))
520 if os.path.exists(real_dst):
521 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000522 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000523 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200524 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100525 if os.path.islink(src):
526 linkto = os.readlink(src)
527 os.symlink(linkto, real_dst)
528 os.unlink(src)
529 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000530 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000531 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000532 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000533 rmtree(src)
534 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000535 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000536 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500537 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000538
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000539def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000540 src = abspath(src)
541 dst = abspath(dst)
542 if not src.endswith(os.path.sep):
543 src += os.path.sep
544 if not dst.endswith(os.path.sep):
545 dst += os.path.sep
546 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000547
548def _get_gid(name):
549 """Returns a gid, given a group name."""
550 if getgrnam is None or name is None:
551 return None
552 try:
553 result = getgrnam(name)
554 except KeyError:
555 result = None
556 if result is not None:
557 return result[2]
558 return None
559
560def _get_uid(name):
561 """Returns an uid, given a user name."""
562 if getpwnam is None or name is None:
563 return None
564 try:
565 result = getpwnam(name)
566 except KeyError:
567 result = None
568 if result is not None:
569 return result[2]
570 return None
571
572def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
573 owner=None, group=None, logger=None):
574 """Create a (possibly compressed) tar file from all the files under
575 'base_dir'.
576
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000577 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000578
579 'owner' and 'group' can be used to define an owner and a group for the
580 archive that is being built. If not provided, the current owner and group
581 will be used.
582
Éric Araujo4433a5f2010-12-15 20:26:30 +0000583 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000584 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000585
586 Returns the output filename.
587 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000588 tar_compression = {'gzip': 'gz', None: ''}
589 compress_ext = {'gzip': '.gz'}
590
591 if _BZ2_SUPPORTED:
592 tar_compression['bzip2'] = 'bz2'
593 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000594
595 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200596 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000597 raise ValueError("bad value for 'compress', or compression format not "
598 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000599
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000600 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000601 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000602
Tarek Ziadé396fad72010-02-23 05:30:31 +0000603 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000604 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200605 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000606 if not dry_run:
607 os.makedirs(archive_dir)
608
Tarek Ziadé396fad72010-02-23 05:30:31 +0000609 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000610 if logger is not None:
611 logger.info('Creating tar archive')
612
613 uid = _get_uid(owner)
614 gid = _get_gid(group)
615
616 def _set_uid_gid(tarinfo):
617 if gid is not None:
618 tarinfo.gid = gid
619 tarinfo.gname = group
620 if uid is not None:
621 tarinfo.uid = uid
622 tarinfo.uname = owner
623 return tarinfo
624
625 if not dry_run:
626 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
627 try:
628 tar.add(base_dir, filter=_set_uid_gid)
629 finally:
630 tar.close()
631
Tarek Ziadé396fad72010-02-23 05:30:31 +0000632 return archive_name
633
Tarek Ziadée2124162010-04-21 13:35:21 +0000634def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000635 # XXX see if we want to keep an external call here
636 if verbose:
637 zipoptions = "-r"
638 else:
639 zipoptions = "-rq"
640 from distutils.errors import DistutilsExecError
641 from distutils.spawn import spawn
642 try:
643 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
644 except DistutilsExecError:
645 # XXX really should distinguish between "couldn't find
646 # external 'zip' command" and "zip failed".
647 raise ExecError("unable to create zip file '%s': "
648 "could neither import the 'zipfile' module nor "
649 "find a standalone zip utility") % zip_filename
650
651def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
652 """Create a zip file from all the files under 'base_dir'.
653
Éric Araujo4433a5f2010-12-15 20:26:30 +0000654 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000655 "zipfile" Python module (if available) or the InfoZIP "zip" utility
656 (if installed and found on the default search path). If neither tool is
657 available, raises ExecError. Returns the name of the output zip
658 file.
659 """
660 zip_filename = base_name + ".zip"
661 archive_dir = os.path.dirname(base_name)
662
663 if not os.path.exists(archive_dir):
664 if logger is not None:
665 logger.info("creating %s", archive_dir)
666 if not dry_run:
667 os.makedirs(archive_dir)
668
669 # If zipfile module is not available, try spawning an external 'zip'
670 # command.
671 try:
672 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400673 except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000674 zipfile = None
675
676 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000677 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000678 else:
679 if logger is not None:
680 logger.info("creating '%s' and adding '%s' to it",
681 zip_filename, base_dir)
682
683 if not dry_run:
Benjamin Peterson091d3862014-02-02 15:30:22 -0500684 with zipfile.ZipFile(zip_filename, "w",
685 compression=zipfile.ZIP_DEFLATED) as zf:
686 for dirpath, dirnames, filenames in os.walk(base_dir):
687 for name in filenames:
688 path = os.path.normpath(os.path.join(dirpath, name))
689 if os.path.isfile(path):
690 zf.write(path, path)
691 if logger is not None:
692 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000693
694 return zip_filename
695
696_ARCHIVE_FORMATS = {
697 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000698 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200699 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000700 }
701
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000702if _BZ2_SUPPORTED:
703 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
704 "bzip2'ed tar-file")
705
Tarek Ziadé396fad72010-02-23 05:30:31 +0000706def get_archive_formats():
707 """Returns a list of supported formats for archiving and unarchiving.
708
709 Each element of the returned sequence is a tuple (name, description)
710 """
711 formats = [(name, registry[2]) for name, registry in
712 _ARCHIVE_FORMATS.items()]
713 formats.sort()
714 return formats
715
716def register_archive_format(name, function, extra_args=None, description=''):
717 """Registers an archive format.
718
719 name is the name of the format. function is the callable that will be
720 used to create archives. If provided, extra_args is a sequence of
721 (name, value) tuples that will be passed as arguments to the callable.
722 description can be provided to describe the format, and will be returned
723 by the get_archive_formats() function.
724 """
725 if extra_args is None:
726 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200727 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000728 raise TypeError('The %s object is not callable' % function)
729 if not isinstance(extra_args, (tuple, list)):
730 raise TypeError('extra_args needs to be a sequence')
731 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200732 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000733 raise TypeError('extra_args elements are : (arg_name, value)')
734
735 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
736
737def unregister_archive_format(name):
738 del _ARCHIVE_FORMATS[name]
739
740def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
741 dry_run=0, owner=None, group=None, logger=None):
742 """Create an archive file (eg. zip or tar).
743
744 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000745 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
746 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000747
748 'root_dir' is a directory that will be the root directory of the
749 archive; ie. we typically chdir into 'root_dir' before creating the
750 archive. 'base_dir' is the directory where we start archiving from;
751 ie. 'base_dir' will be the common prefix of all files and
752 directories in the archive. 'root_dir' and 'base_dir' both default
753 to the current directory. Returns the name of the archive file.
754
755 'owner' and 'group' are used when creating a tar archive. By default,
756 uses the current owner and group.
757 """
758 save_cwd = os.getcwd()
759 if root_dir is not None:
760 if logger is not None:
761 logger.debug("changing into '%s'", root_dir)
762 base_name = os.path.abspath(base_name)
763 if not dry_run:
764 os.chdir(root_dir)
765
766 if base_dir is None:
767 base_dir = os.curdir
768
769 kwargs = {'dry_run': dry_run, 'logger': logger}
770
771 try:
772 format_info = _ARCHIVE_FORMATS[format]
773 except KeyError:
774 raise ValueError("unknown archive format '%s'" % format)
775
776 func = format_info[0]
777 for arg, val in format_info[1]:
778 kwargs[arg] = val
779
780 if format != 'zip':
781 kwargs['owner'] = owner
782 kwargs['group'] = group
783
784 try:
785 filename = func(base_name, base_dir, **kwargs)
786 finally:
787 if root_dir is not None:
788 if logger is not None:
789 logger.debug("changing back to '%s'", save_cwd)
790 os.chdir(save_cwd)
791
792 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000793
794
795def get_unpack_formats():
796 """Returns a list of supported formats for unpacking.
797
798 Each element of the returned sequence is a tuple
799 (name, extensions, description)
800 """
801 formats = [(name, info[0], info[3]) for name, info in
802 _UNPACK_FORMATS.items()]
803 formats.sort()
804 return formats
805
806def _check_unpack_options(extensions, function, extra_args):
807 """Checks what gets registered as an unpacker."""
808 # first make sure no other unpacker is registered for this extension
809 existing_extensions = {}
810 for name, info in _UNPACK_FORMATS.items():
811 for ext in info[0]:
812 existing_extensions[ext] = name
813
814 for extension in extensions:
815 if extension in existing_extensions:
816 msg = '%s is already registered for "%s"'
817 raise RegistryError(msg % (extension,
818 existing_extensions[extension]))
819
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200820 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000821 raise TypeError('The registered function must be a callable')
822
823
824def register_unpack_format(name, extensions, function, extra_args=None,
825 description=''):
826 """Registers an unpack format.
827
828 `name` is the name of the format. `extensions` is a list of extensions
829 corresponding to the format.
830
831 `function` is the callable that will be
832 used to unpack archives. The callable will receive archives to unpack.
833 If it's unable to handle an archive, it needs to raise a ReadError
834 exception.
835
836 If provided, `extra_args` is a sequence of
837 (name, value) tuples that will be passed as arguments to the callable.
838 description can be provided to describe the format, and will be returned
839 by the get_unpack_formats() function.
840 """
841 if extra_args is None:
842 extra_args = []
843 _check_unpack_options(extensions, function, extra_args)
844 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
845
846def unregister_unpack_format(name):
847 """Removes the pack format from the registery."""
848 del _UNPACK_FORMATS[name]
849
850def _ensure_directory(path):
851 """Ensure that the parent directory of `path` exists"""
852 dirname = os.path.dirname(path)
853 if not os.path.isdir(dirname):
854 os.makedirs(dirname)
855
856def _unpack_zipfile(filename, extract_dir):
857 """Unpack zip `filename` to `extract_dir`
858 """
859 try:
860 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400861 except ImportError:
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000862 raise ReadError('zlib not supported, cannot unpack this archive.')
863
864 if not zipfile.is_zipfile(filename):
865 raise ReadError("%s is not a zip file" % filename)
866
867 zip = zipfile.ZipFile(filename)
868 try:
869 for info in zip.infolist():
870 name = info.filename
871
872 # don't extract absolute paths or ones with .. in them
873 if name.startswith('/') or '..' in name:
874 continue
875
876 target = os.path.join(extract_dir, *name.split('/'))
877 if not target:
878 continue
879
880 _ensure_directory(target)
881 if not name.endswith('/'):
882 # file
883 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200884 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000885 try:
886 f.write(data)
887 finally:
888 f.close()
889 del data
890 finally:
891 zip.close()
892
893def _unpack_tarfile(filename, extract_dir):
894 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
895 """
896 try:
897 tarobj = tarfile.open(filename)
898 except tarfile.TarError:
899 raise ReadError(
900 "%s is not a compressed or uncompressed tar file" % filename)
901 try:
902 tarobj.extractall(extract_dir)
903 finally:
904 tarobj.close()
905
906_UNPACK_FORMATS = {
907 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000908 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
909 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
910 }
911
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000912if _BZ2_SUPPORTED:
913 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
914 "bzip2'ed tar-file")
915
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000916def _find_unpack_format(filename):
917 for name, info in _UNPACK_FORMATS.items():
918 for extension in info[0]:
919 if filename.endswith(extension):
920 return name
921 return None
922
923def unpack_archive(filename, extract_dir=None, format=None):
924 """Unpack an archive.
925
926 `filename` is the name of the archive.
927
928 `extract_dir` is the name of the target directory, where the archive
929 is unpacked. If not provided, the current working directory is used.
930
931 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
932 other registered format. If not provided, unpack_archive will use the
933 filename extension and see if an unpacker was registered for that
934 extension.
935
936 In case none is found, a ValueError is raised.
937 """
938 if extract_dir is None:
939 extract_dir = os.getcwd()
940
941 if format is not None:
942 try:
943 format_info = _UNPACK_FORMATS[format]
944 except KeyError:
945 raise ValueError("Unknown unpack format '{0}'".format(format))
946
Nick Coghlanabf202d2011-03-16 13:52:20 -0400947 func = format_info[1]
948 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000949 else:
950 # we need to look at the registered unpackers supported extensions
951 format = _find_unpack_format(filename)
952 if format is None:
953 raise ReadError("Unknown archive format '{0}'".format(filename))
954
955 func = _UNPACK_FORMATS[format][1]
956 kwargs = dict(_UNPACK_FORMATS[format][2])
957 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200958
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200959
960if hasattr(os, 'statvfs'):
961
962 __all__.append('disk_usage')
963 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200964
965 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200966 """Return disk usage statistics about the given path.
967
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200968 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200969 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200970 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200971 st = os.statvfs(path)
972 free = st.f_bavail * st.f_frsize
973 total = st.f_blocks * st.f_frsize
974 used = (st.f_blocks - st.f_bfree) * st.f_frsize
975 return _ntuple_diskusage(total, used, free)
976
977elif os.name == 'nt':
978
979 import nt
980 __all__.append('disk_usage')
981 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
982
983 def disk_usage(path):
984 """Return disk usage statistics about the given path.
985
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300986 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200987 'free', which are the amount of total, used and free space, in bytes.
988 """
989 total, free = nt._getdiskusage(path)
990 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200991 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200992
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200993
Sandro Tosid902a142011-08-22 23:28:27 +0200994def chown(path, user=None, group=None):
995 """Change owner user and group of the given path.
996
997 user and group can be the uid/gid or the user/group names, and in that case,
998 they are converted to their respective uid/gid.
999 """
1000
1001 if user is None and group is None:
1002 raise ValueError("user and/or group must be set")
1003
1004 _user = user
1005 _group = group
1006
1007 # -1 means don't change it
1008 if user is None:
1009 _user = -1
1010 # user can either be an int (the uid) or a string (the system username)
1011 elif isinstance(user, str):
1012 _user = _get_uid(user)
1013 if _user is None:
1014 raise LookupError("no such user: {!r}".format(user))
1015
1016 if group is None:
1017 _group = -1
1018 elif not isinstance(group, int):
1019 _group = _get_gid(group)
1020 if _group is None:
1021 raise LookupError("no such group: {!r}".format(group))
1022
1023 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001024
1025def get_terminal_size(fallback=(80, 24)):
1026 """Get the size of the terminal window.
1027
1028 For each of the two dimensions, the environment variable, COLUMNS
1029 and LINES respectively, is checked. If the variable is defined and
1030 the value is a positive integer, it is used.
1031
1032 When COLUMNS or LINES is not defined, which is the common case,
1033 the terminal connected to sys.__stdout__ is queried
1034 by invoking os.get_terminal_size.
1035
1036 If the terminal size cannot be successfully queried, either because
1037 the system doesn't support querying, or because we are not
1038 connected to a terminal, the value given in fallback parameter
1039 is used. Fallback defaults to (80, 24) which is the default
1040 size used by many terminal emulators.
1041
1042 The value returned is a named tuple of type os.terminal_size.
1043 """
1044 # columns, lines are the working values
1045 try:
1046 columns = int(os.environ['COLUMNS'])
1047 except (KeyError, ValueError):
1048 columns = 0
1049
1050 try:
1051 lines = int(os.environ['LINES'])
1052 except (KeyError, ValueError):
1053 lines = 0
1054
1055 # only query if necessary
1056 if columns <= 0 or lines <= 0:
1057 try:
1058 size = os.get_terminal_size(sys.__stdout__.fileno())
1059 except (NameError, OSError):
1060 size = os.terminal_size(fallback)
1061 if columns <= 0:
1062 columns = size.columns
1063 if lines <= 0:
1064 lines = size.lines
1065
1066 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001067
1068def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001069 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001070 conforms to the given mode on the PATH, or None if there is no such
1071 file.
1072
1073 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1074 of os.environ.get("PATH"), or can be overridden with a custom search
1075 path.
1076
1077 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001078 # Check that a given file can be accessed with the correct mode.
1079 # Additionally check that `file` is not a directory, as on Windows
1080 # directories pass the os.access check.
1081 def _access_check(fn, mode):
1082 return (os.path.exists(fn) and os.access(fn, mode)
1083 and not os.path.isdir(fn))
1084
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001085 # If we're given a path with a directory part, look it up directly rather
1086 # than referring to PATH directories. This includes checking relative to the
1087 # current directory, e.g. ./script
1088 if os.path.dirname(cmd):
1089 if _access_check(cmd, mode):
1090 return cmd
1091 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001092
Barry Warsaw618738b2013-04-16 11:05:03 -04001093 if path is None:
1094 path = os.environ.get("PATH", os.defpath)
1095 if not path:
1096 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001097 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001098
1099 if sys.platform == "win32":
1100 # The current directory takes precedence on Windows.
1101 if not os.curdir in path:
1102 path.insert(0, os.curdir)
1103
1104 # PATHEXT is necessary to check on Windows.
1105 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1106 # See if the given file matches any of the expected path extensions.
1107 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001108 # If it does match, only test that one, otherwise we have to try
1109 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001110 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1111 files = [cmd]
1112 else:
1113 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001114 else:
1115 # On other platforms you don't have things like PATHEXT to tell you
1116 # what file suffixes are executable, so just pass on cmd as-is.
1117 files = [cmd]
1118
1119 seen = set()
1120 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001121 normdir = os.path.normcase(dir)
1122 if not normdir in seen:
1123 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001124 for thefile in files:
1125 name = os.path.join(dir, thefile)
1126 if _access_check(name, mode):
1127 return name
1128 return None