blob: c2c66de51314e0d2af59112b496827cfe9b95654 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000013import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000014
15try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000016 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010017 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000018 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040019except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000020 _BZ2_SUPPORTED = False
21
22try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030023 import lzma
24 del lzma
25 _LZMA_SUPPORTED = True
26except ImportError:
27 _LZMA_SUPPORTED = False
28
29try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000030 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040031except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000032 getpwnam = None
33
34try:
35 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040036except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000037 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000038
Tarek Ziadéc3399782010-02-23 05:39:18 +000039__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
40 "copytree", "move", "rmtree", "Error", "SpecialFileError",
41 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000042 "register_archive_format", "unregister_archive_format",
43 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020044 "unregister_unpack_format", "unpack_archive",
Brian Curtinc57a3452012-06-22 16:00:30 -050045 "ignore_patterns", "chown", "which"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020046 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000047
Andrew Svetlov3438fa42012-12-17 23:35:18 +020048class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000049 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000050
Hynek Schlawack48653762012-10-07 12:49:58 +020051class SameFileError(Error):
52 """Raised when source and destination are the same file."""
53
Andrew Svetlov3438fa42012-12-17 23:35:18 +020054class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000055 """Raised when trying to do a kind of operation (e.g. copying) which is
56 not supported on a special file (e.g. a named pipe)"""
57
Andrew Svetlov3438fa42012-12-17 23:35:18 +020058class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000059 """Raised when a command could not be executed"""
60
Andrew Svetlov3438fa42012-12-17 23:35:18 +020061class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000062 """Raised when an archive cannot be read"""
63
64class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030065 """Raised when a registry operation with the archiving
Tarek Ziadé6ac91722010-04-28 17:51:36 +000066 and unpacking registeries fails"""
67
68
Greg Stein42bb8b32000-07-12 09:55:30 +000069def copyfileobj(fsrc, fdst, length=16*1024):
70 """copy data from file-like object fsrc to file-like object fdst"""
71 while 1:
72 buf = fsrc.read(length)
73 if not buf:
74 break
75 fdst.write(buf)
76
Johannes Gijsbers46f14592004-08-14 13:30:02 +000077def _samefile(src, dst):
78 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000079 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000080 try:
81 return os.path.samefile(src, dst)
82 except OSError:
83 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000084
85 # All other platforms: check for same pathname.
86 return (os.path.normcase(os.path.abspath(src)) ==
87 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000088
Larry Hastingsb4038062012-07-15 10:57:38 -070089def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010090 """Copy data from src to dst.
91
Larry Hastingsb4038062012-07-15 10:57:38 -070092 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +010093 symlink will be created instead of copying the file it points to.
94
95 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000096 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +020097 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000098
Antoine Pitrou7fff0962009-05-01 21:09:44 +000099 for fn in [src, dst]:
100 try:
101 st = os.stat(fn)
102 except OSError:
103 # File most likely does not exist
104 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000105 else:
106 # XXX What about other special files? (sockets, devices...)
107 if stat.S_ISFIFO(st.st_mode):
108 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000109
Larry Hastingsb4038062012-07-15 10:57:38 -0700110 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100111 os.symlink(os.readlink(src), dst)
112 else:
113 with open(src, 'rb') as fsrc:
114 with open(dst, 'wb') as fdst:
115 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500116 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000117
Larry Hastingsb4038062012-07-15 10:57:38 -0700118def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100119 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000120
Larry Hastingsb4038062012-07-15 10:57:38 -0700121 If follow_symlinks is not set, symlinks aren't followed if and only
122 if both `src` and `dst` are symlinks. If `lchmod` isn't available
123 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100124
125 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700126 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100127 if hasattr(os, 'lchmod'):
128 stat_func, chmod_func = os.lstat, os.lchmod
129 else:
130 return
131 elif hasattr(os, 'chmod'):
132 stat_func, chmod_func = os.stat, os.chmod
133 else:
134 return
135
136 st = stat_func(src)
137 chmod_func(dst, stat.S_IMODE(st.st_mode))
138
Larry Hastingsad5ae042012-07-14 17:55:11 -0700139if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700140 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700141 """Copy extended filesystem attributes from `src` to `dst`.
142
143 Overwrite existing attributes.
144
Larry Hastingsb4038062012-07-15 10:57:38 -0700145 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700146
147 """
148
Hynek Schlawack0beab052013-02-05 08:22:44 +0100149 try:
150 names = os.listxattr(src, follow_symlinks=follow_symlinks)
151 except OSError as e:
152 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
153 raise
154 return
155 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700156 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700157 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
158 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700159 except OSError as e:
160 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
161 raise
162else:
163 def _copyxattr(*args, **kwargs):
164 pass
165
Larry Hastingsb4038062012-07-15 10:57:38 -0700166def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100167 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
168
Larry Hastingsb4038062012-07-15 10:57:38 -0700169 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100170 only if both `src` and `dst` are symlinks.
171
172 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700173 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100174 pass
175
Larry Hastings9cf065c2012-06-22 16:30:09 -0700176 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700177 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700178 if follow:
179 # use the real function if it exists
180 def lookup(name):
181 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100182 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700183 # use the real function only if it exists
184 # *and* it supports follow_symlinks
185 def lookup(name):
186 fn = getattr(os, name, _nop)
187 if fn in os.supports_follow_symlinks:
188 return fn
189 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100190
Larry Hastings9cf065c2012-06-22 16:30:09 -0700191 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000192 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700193 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
194 follow_symlinks=follow)
195 try:
196 lookup("chmod")(dst, mode, follow_symlinks=follow)
197 except NotImplementedError:
198 # if we got a NotImplementedError, it's because
199 # * follow_symlinks=False,
200 # * lchown() is unavailable, and
201 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300202 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700203 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
204 # (it returned ENOSUP.)
205 # therefore we're out of options--we simply cannot chown the
206 # symlink. give up, suppress the error.
207 # (which is what shutil always did in this circumstance.)
208 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100209 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000210 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700211 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000212 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700213 for err in 'EOPNOTSUPP', 'ENOTSUP':
214 if hasattr(errno, err) and why.errno == getattr(errno, err):
215 break
216 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000217 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700218 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200219
Larry Hastingsb4038062012-07-15 10:57:38 -0700220def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500221 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000222
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000223 The destination may be a directory.
224
Larry Hastingsb4038062012-07-15 10:57:38 -0700225 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100226 resembles GNU's "cp -P src dst".
227
Hynek Schlawack48653762012-10-07 12:49:58 +0200228 If source and destination are the same file, a SameFileError will be
229 raised.
230
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000231 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000232 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000233 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700234 copyfile(src, dst, follow_symlinks=follow_symlinks)
235 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500236 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000237
Larry Hastingsb4038062012-07-15 10:57:38 -0700238def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500239 """Copy data and all stat info ("cp -p src dst"). Return the file's
240 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000241
242 The destination may be a directory.
243
Larry Hastingsb4038062012-07-15 10:57:38 -0700244 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100245 resembles GNU's "cp -P src dst".
246
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000247 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000248 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000249 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700250 copyfile(src, dst, follow_symlinks=follow_symlinks)
251 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500252 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000253
Georg Brandl2ee470f2008-07-16 12:55:28 +0000254def ignore_patterns(*patterns):
255 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000256
Georg Brandl2ee470f2008-07-16 12:55:28 +0000257 Patterns is a sequence of glob-style patterns
258 that are used to exclude files"""
259 def _ignore_patterns(path, names):
260 ignored_names = []
261 for pattern in patterns:
262 ignored_names.extend(fnmatch.filter(names, pattern))
263 return set(ignored_names)
264 return _ignore_patterns
265
Tarek Ziadéfb437512010-04-20 08:57:33 +0000266def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
267 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000268 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000269
270 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000271 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000272
273 If the optional symlinks flag is true, symbolic links in the
274 source tree result in symbolic links in the destination tree; if
275 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000276 links are copied. If the file pointed by the symlink doesn't
277 exist, an exception will be added in the list of errors raised in
278 an Error exception at the end of the copy process.
279
280 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000281 want to silence this exception. Notice that this has no effect on
282 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000283
Georg Brandl2ee470f2008-07-16 12:55:28 +0000284 The optional ignore argument is a callable. If given, it
285 is called with the `src` parameter, which is the directory
286 being visited by copytree(), and `names` which is the list of
287 `src` contents, as returned by os.listdir():
288
289 callable(src, names) -> ignored_names
290
291 Since copytree() is called recursively, the callable will be
292 called once for each directory that is copied. It returns a
293 list of names relative to the `src` directory that should
294 not be copied.
295
Tarek Ziadé5340db32010-04-19 22:30:51 +0000296 The optional copy_function argument is a callable that will be used
297 to copy each file. It will be called with the source path and the
298 destination path as arguments. By default, copy2() is used, but any
299 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000300
301 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000302 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000303 if ignore is not None:
304 ignored_names = ignore(src, names)
305 else:
306 ignored_names = set()
307
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000308 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000309 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000310 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000311 if name in ignored_names:
312 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000313 srcname = os.path.join(src, name)
314 dstname = os.path.join(dst, name)
315 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000316 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000317 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000318 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100319 # We can't just leave it to `copy_function` because legacy
320 # code with a custom `copy_function` may rely on copytree
321 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000322 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700323 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000324 else:
325 # ignore dangling symlink if the flag is on
326 if not os.path.exists(linkto) and ignore_dangling_symlinks:
327 continue
328 # otherwise let the copy occurs. copy2 will raise an error
329 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000330 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000331 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000332 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000333 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000334 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000335 # catch the Error from the recursive copytree so that we can
336 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000337 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000338 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200339 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000340 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341 try:
342 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000343 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200344 # Copying file access times may fail on Windows
345 if why.winerror is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200346 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000347 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000348 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500349 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000350
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200351# version vulnerable to race conditions
352def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000353 try:
354 if os.path.islink(path):
355 # symlinks to directories are forbidden, see bug #1669
356 raise OSError("Cannot call rmtree on a symbolic link")
357 except OSError:
358 onerror(os.path.islink, path, sys.exc_info())
359 # can't continue even if onerror hook returns
360 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000361 names = []
362 try:
363 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200364 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000365 onerror(os.listdir, path, sys.exc_info())
366 for name in names:
367 fullname = os.path.join(path, name)
368 try:
369 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200370 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000371 mode = 0
372 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200373 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000374 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000375 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200376 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200377 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200378 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000379 try:
380 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200381 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000382 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000383
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200384# Version using fd-based APIs to protect against races
385def _rmtree_safe_fd(topfd, path, onerror):
386 names = []
387 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200388 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100389 except OSError as err:
390 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200391 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200392 for name in names:
393 fullname = os.path.join(path, name)
394 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200395 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200396 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100397 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200398 mode = 0
399 if stat.S_ISDIR(mode):
400 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200401 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100402 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200403 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200404 else:
405 try:
406 if os.path.samestat(orig_st, os.fstat(dirfd)):
407 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200408 try:
409 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100410 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200411 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100412 else:
413 try:
414 # This can only happen if someone replaces
415 # a directory with a symlink after the call to
416 # stat.S_ISDIR above.
417 raise OSError("Cannot call rmtree on a symbolic "
418 "link")
419 except OSError:
420 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200421 finally:
422 os.close(dirfd)
423 else:
424 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200425 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100426 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200427 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200428
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200429_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
430 os.supports_dir_fd and
431 os.listdir in os.supports_fd and
432 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000433
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200434def rmtree(path, ignore_errors=False, onerror=None):
435 """Recursively delete a directory tree.
436
437 If ignore_errors is set, errors are ignored; otherwise, if onerror
438 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200439 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200440 path is the argument to that function that caused it to fail; and
441 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
442 is false and onerror is None, an exception is raised.
443
444 """
445 if ignore_errors:
446 def onerror(*args):
447 pass
448 elif onerror is None:
449 def onerror(*args):
450 raise
451 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200452 # While the unsafe rmtree works fine on bytes, the fd based does not.
453 if isinstance(path, bytes):
454 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200455 # Note: To guard against symlink races, we use the standard
456 # lstat()/open()/fstat() trick.
457 try:
458 orig_st = os.lstat(path)
459 except Exception:
460 onerror(os.lstat, path, sys.exc_info())
461 return
462 try:
463 fd = os.open(path, os.O_RDONLY)
464 except Exception:
465 onerror(os.lstat, path, sys.exc_info())
466 return
467 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100468 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200469 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200470 try:
471 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200472 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200473 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200474 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100475 try:
476 # symlinks to directories are forbidden, see bug #1669
477 raise OSError("Cannot call rmtree on a symbolic link")
478 except OSError:
479 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200480 finally:
481 os.close(fd)
482 else:
483 return _rmtree_unsafe(path, onerror)
484
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000485# Allow introspection of whether or not the hardening against symlink
486# attacks is supported on the current platform
487rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000488
Christian Heimesada8c3b2008-03-18 18:26:33 +0000489def _basename(path):
490 # A basename() variant which first strips the trailing slash, if present.
491 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200492 sep = os.path.sep + (os.path.altsep or '')
493 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000494
R David Murray6ffface2014-06-11 14:40:13 -0400495def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000496 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500497 similar to the Unix "mv" command. Return the file or directory's
498 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000499
500 If the destination is a directory or a symlink to a directory, the source
501 is moved inside the directory. The destination path must not already
502 exist.
503
504 If the destination already exists but is not a directory, it may be
505 overwritten depending on os.rename() semantics.
506
507 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100508 Otherwise, src is copied to the destination and then removed. Symlinks are
509 recreated under the new name if os.rename() fails because of cross
510 filesystem renames.
511
R David Murray6ffface2014-06-11 14:40:13 -0400512 The optional `copy_function` argument is a callable that will be used
513 to copy the source or it will be delegated to `copytree`.
514 By default, copy2() is used, but any function that supports the same
515 signature (like copy()) can be used.
516
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000517 A lot more could be done here... A look at a mv.c shows a lot of
518 the issues this implementation glosses over.
519
520 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000521 real_dst = dst
522 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200523 if _samefile(src, dst):
524 # We might be on a case insensitive filesystem,
525 # perform the rename anyway.
526 os.rename(src, dst)
527 return
528
Christian Heimesada8c3b2008-03-18 18:26:33 +0000529 real_dst = os.path.join(dst, _basename(src))
530 if os.path.exists(real_dst):
531 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000532 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000533 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200534 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100535 if os.path.islink(src):
536 linkto = os.readlink(src)
537 os.symlink(linkto, real_dst)
538 os.unlink(src)
539 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000540 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400541 raise Error("Cannot move a directory '%s' into itself"
542 " '%s'." % (src, dst))
543 copytree(src, real_dst, copy_function=copy_function,
544 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000545 rmtree(src)
546 else:
R David Murray6ffface2014-06-11 14:40:13 -0400547 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000548 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500549 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000550
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000551def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300552 src = os.path.abspath(src)
553 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000554 if not src.endswith(os.path.sep):
555 src += os.path.sep
556 if not dst.endswith(os.path.sep):
557 dst += os.path.sep
558 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000559
560def _get_gid(name):
561 """Returns a gid, given a group name."""
562 if getgrnam is None or name is None:
563 return None
564 try:
565 result = getgrnam(name)
566 except KeyError:
567 result = None
568 if result is not None:
569 return result[2]
570 return None
571
572def _get_uid(name):
573 """Returns an uid, given a user name."""
574 if getpwnam is None or name is None:
575 return None
576 try:
577 result = getpwnam(name)
578 except KeyError:
579 result = None
580 if result is not None:
581 return result[2]
582 return None
583
584def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
585 owner=None, group=None, logger=None):
586 """Create a (possibly compressed) tar file from all the files under
587 'base_dir'.
588
Serhiy Storchaka11213772014-08-06 18:50:19 +0300589 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000590
591 'owner' and 'group' can be used to define an owner and a group for the
592 archive that is being built. If not provided, the current owner and group
593 will be used.
594
Éric Araujo4433a5f2010-12-15 20:26:30 +0000595 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300596 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000597
598 Returns the output filename.
599 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000600 tar_compression = {'gzip': 'gz', None: ''}
601 compress_ext = {'gzip': '.gz'}
602
603 if _BZ2_SUPPORTED:
604 tar_compression['bzip2'] = 'bz2'
605 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000606
Serhiy Storchaka11213772014-08-06 18:50:19 +0300607 if _LZMA_SUPPORTED:
608 tar_compression['xz'] = 'xz'
609 compress_ext['xz'] = '.xz'
610
Tarek Ziadé396fad72010-02-23 05:30:31 +0000611 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200612 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000613 raise ValueError("bad value for 'compress', or compression format not "
614 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000615
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000616 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000617 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000618
Tarek Ziadé396fad72010-02-23 05:30:31 +0000619 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000620 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200621 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000622 if not dry_run:
623 os.makedirs(archive_dir)
624
Tarek Ziadé396fad72010-02-23 05:30:31 +0000625 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000626 if logger is not None:
627 logger.info('Creating tar archive')
628
629 uid = _get_uid(owner)
630 gid = _get_gid(group)
631
632 def _set_uid_gid(tarinfo):
633 if gid is not None:
634 tarinfo.gid = gid
635 tarinfo.gname = group
636 if uid is not None:
637 tarinfo.uid = uid
638 tarinfo.uname = owner
639 return tarinfo
640
641 if not dry_run:
642 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
643 try:
644 tar.add(base_dir, filter=_set_uid_gid)
645 finally:
646 tar.close()
647
Tarek Ziadé396fad72010-02-23 05:30:31 +0000648 return archive_name
649
Tarek Ziadé396fad72010-02-23 05:30:31 +0000650def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
651 """Create a zip file from all the files under 'base_dir'.
652
Éric Araujo4433a5f2010-12-15 20:26:30 +0000653 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000654 "zipfile" Python module (if available) or the InfoZIP "zip" utility
655 (if installed and found on the default search path). If neither tool is
656 available, raises ExecError. Returns the name of the output zip
657 file.
658 """
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400659 import zipfile
660
Tarek Ziadé396fad72010-02-23 05:30:31 +0000661 zip_filename = base_name + ".zip"
662 archive_dir = os.path.dirname(base_name)
663
664 if not os.path.exists(archive_dir):
665 if logger is not None:
666 logger.info("creating %s", archive_dir)
667 if not dry_run:
668 os.makedirs(archive_dir)
669
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400670 if logger is not None:
671 logger.info("creating '%s' and adding '%s' to it",
672 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000673
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400674 if not dry_run:
675 with zipfile.ZipFile(zip_filename, "w",
676 compression=zipfile.ZIP_DEFLATED) as zf:
677 for dirpath, dirnames, filenames in os.walk(base_dir):
678 for name in filenames:
679 path = os.path.normpath(os.path.join(dirpath, name))
680 if os.path.isfile(path):
681 zf.write(path, path)
682 if logger is not None:
683 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000684
685 return zip_filename
686
687_ARCHIVE_FORMATS = {
688 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000689 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200690 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000691 }
692
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000693if _BZ2_SUPPORTED:
694 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
695 "bzip2'ed tar-file")
696
Serhiy Storchaka11213772014-08-06 18:50:19 +0300697if _LZMA_SUPPORTED:
698 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
699 "xz'ed tar-file")
700
Tarek Ziadé396fad72010-02-23 05:30:31 +0000701def get_archive_formats():
702 """Returns a list of supported formats for archiving and unarchiving.
703
704 Each element of the returned sequence is a tuple (name, description)
705 """
706 formats = [(name, registry[2]) for name, registry in
707 _ARCHIVE_FORMATS.items()]
708 formats.sort()
709 return formats
710
711def register_archive_format(name, function, extra_args=None, description=''):
712 """Registers an archive format.
713
714 name is the name of the format. function is the callable that will be
715 used to create archives. If provided, extra_args is a sequence of
716 (name, value) tuples that will be passed as arguments to the callable.
717 description can be provided to describe the format, and will be returned
718 by the get_archive_formats() function.
719 """
720 if extra_args is None:
721 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200722 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000723 raise TypeError('The %s object is not callable' % function)
724 if not isinstance(extra_args, (tuple, list)):
725 raise TypeError('extra_args needs to be a sequence')
726 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200727 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000728 raise TypeError('extra_args elements are : (arg_name, value)')
729
730 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
731
732def unregister_archive_format(name):
733 del _ARCHIVE_FORMATS[name]
734
735def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
736 dry_run=0, owner=None, group=None, logger=None):
737 """Create an archive file (eg. zip or tar).
738
739 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000740 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
741 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000742
743 'root_dir' is a directory that will be the root directory of the
744 archive; ie. we typically chdir into 'root_dir' before creating the
745 archive. 'base_dir' is the directory where we start archiving from;
746 ie. 'base_dir' will be the common prefix of all files and
747 directories in the archive. 'root_dir' and 'base_dir' both default
748 to the current directory. Returns the name of the archive file.
749
750 'owner' and 'group' are used when creating a tar archive. By default,
751 uses the current owner and group.
752 """
753 save_cwd = os.getcwd()
754 if root_dir is not None:
755 if logger is not None:
756 logger.debug("changing into '%s'", root_dir)
757 base_name = os.path.abspath(base_name)
758 if not dry_run:
759 os.chdir(root_dir)
760
761 if base_dir is None:
762 base_dir = os.curdir
763
764 kwargs = {'dry_run': dry_run, 'logger': logger}
765
766 try:
767 format_info = _ARCHIVE_FORMATS[format]
768 except KeyError:
769 raise ValueError("unknown archive format '%s'" % format)
770
771 func = format_info[0]
772 for arg, val in format_info[1]:
773 kwargs[arg] = val
774
775 if format != 'zip':
776 kwargs['owner'] = owner
777 kwargs['group'] = group
778
779 try:
780 filename = func(base_name, base_dir, **kwargs)
781 finally:
782 if root_dir is not None:
783 if logger is not None:
784 logger.debug("changing back to '%s'", save_cwd)
785 os.chdir(save_cwd)
786
787 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000788
789
790def get_unpack_formats():
791 """Returns a list of supported formats for unpacking.
792
793 Each element of the returned sequence is a tuple
794 (name, extensions, description)
795 """
796 formats = [(name, info[0], info[3]) for name, info in
797 _UNPACK_FORMATS.items()]
798 formats.sort()
799 return formats
800
801def _check_unpack_options(extensions, function, extra_args):
802 """Checks what gets registered as an unpacker."""
803 # first make sure no other unpacker is registered for this extension
804 existing_extensions = {}
805 for name, info in _UNPACK_FORMATS.items():
806 for ext in info[0]:
807 existing_extensions[ext] = name
808
809 for extension in extensions:
810 if extension in existing_extensions:
811 msg = '%s is already registered for "%s"'
812 raise RegistryError(msg % (extension,
813 existing_extensions[extension]))
814
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200815 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000816 raise TypeError('The registered function must be a callable')
817
818
819def register_unpack_format(name, extensions, function, extra_args=None,
820 description=''):
821 """Registers an unpack format.
822
823 `name` is the name of the format. `extensions` is a list of extensions
824 corresponding to the format.
825
826 `function` is the callable that will be
827 used to unpack archives. The callable will receive archives to unpack.
828 If it's unable to handle an archive, it needs to raise a ReadError
829 exception.
830
831 If provided, `extra_args` is a sequence of
832 (name, value) tuples that will be passed as arguments to the callable.
833 description can be provided to describe the format, and will be returned
834 by the get_unpack_formats() function.
835 """
836 if extra_args is None:
837 extra_args = []
838 _check_unpack_options(extensions, function, extra_args)
839 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
840
841def unregister_unpack_format(name):
842 """Removes the pack format from the registery."""
843 del _UNPACK_FORMATS[name]
844
845def _ensure_directory(path):
846 """Ensure that the parent directory of `path` exists"""
847 dirname = os.path.dirname(path)
848 if not os.path.isdir(dirname):
849 os.makedirs(dirname)
850
851def _unpack_zipfile(filename, extract_dir):
852 """Unpack zip `filename` to `extract_dir`
853 """
854 try:
855 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400856 except ImportError:
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000857 raise ReadError('zlib not supported, cannot unpack this archive.')
858
859 if not zipfile.is_zipfile(filename):
860 raise ReadError("%s is not a zip file" % filename)
861
862 zip = zipfile.ZipFile(filename)
863 try:
864 for info in zip.infolist():
865 name = info.filename
866
867 # don't extract absolute paths or ones with .. in them
868 if name.startswith('/') or '..' in name:
869 continue
870
871 target = os.path.join(extract_dir, *name.split('/'))
872 if not target:
873 continue
874
875 _ensure_directory(target)
876 if not name.endswith('/'):
877 # file
878 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200879 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000880 try:
881 f.write(data)
882 finally:
883 f.close()
884 del data
885 finally:
886 zip.close()
887
888def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +0300889 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000890 """
891 try:
892 tarobj = tarfile.open(filename)
893 except tarfile.TarError:
894 raise ReadError(
895 "%s is not a compressed or uncompressed tar file" % filename)
896 try:
897 tarobj.extractall(extract_dir)
898 finally:
899 tarobj.close()
900
901_UNPACK_FORMATS = {
902 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000903 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
904 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
905 }
906
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000907if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +0300908 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000909 "bzip2'ed tar-file")
910
Serhiy Storchaka11213772014-08-06 18:50:19 +0300911if _LZMA_SUPPORTED:
912 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
913 "xz'ed tar-file")
914
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000915def _find_unpack_format(filename):
916 for name, info in _UNPACK_FORMATS.items():
917 for extension in info[0]:
918 if filename.endswith(extension):
919 return name
920 return None
921
922def unpack_archive(filename, extract_dir=None, format=None):
923 """Unpack an archive.
924
925 `filename` is the name of the archive.
926
927 `extract_dir` is the name of the target directory, where the archive
928 is unpacked. If not provided, the current working directory is used.
929
930 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
931 other registered format. If not provided, unpack_archive will use the
932 filename extension and see if an unpacker was registered for that
933 extension.
934
935 In case none is found, a ValueError is raised.
936 """
937 if extract_dir is None:
938 extract_dir = os.getcwd()
939
940 if format is not None:
941 try:
942 format_info = _UNPACK_FORMATS[format]
943 except KeyError:
944 raise ValueError("Unknown unpack format '{0}'".format(format))
945
Nick Coghlanabf202d2011-03-16 13:52:20 -0400946 func = format_info[1]
947 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000948 else:
949 # we need to look at the registered unpackers supported extensions
950 format = _find_unpack_format(filename)
951 if format is None:
952 raise ReadError("Unknown archive format '{0}'".format(filename))
953
954 func = _UNPACK_FORMATS[format][1]
955 kwargs = dict(_UNPACK_FORMATS[format][2])
956 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200957
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200958
959if hasattr(os, 'statvfs'):
960
961 __all__.append('disk_usage')
962 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200963
964 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200965 """Return disk usage statistics about the given path.
966
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200967 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200968 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200969 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200970 st = os.statvfs(path)
971 free = st.f_bavail * st.f_frsize
972 total = st.f_blocks * st.f_frsize
973 used = (st.f_blocks - st.f_bfree) * st.f_frsize
974 return _ntuple_diskusage(total, used, free)
975
976elif os.name == 'nt':
977
978 import nt
979 __all__.append('disk_usage')
980 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
981
982 def disk_usage(path):
983 """Return disk usage statistics about the given path.
984
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300985 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200986 'free', which are the amount of total, used and free space, in bytes.
987 """
988 total, free = nt._getdiskusage(path)
989 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200990 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200991
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200992
Sandro Tosid902a142011-08-22 23:28:27 +0200993def chown(path, user=None, group=None):
994 """Change owner user and group of the given path.
995
996 user and group can be the uid/gid or the user/group names, and in that case,
997 they are converted to their respective uid/gid.
998 """
999
1000 if user is None and group is None:
1001 raise ValueError("user and/or group must be set")
1002
1003 _user = user
1004 _group = group
1005
1006 # -1 means don't change it
1007 if user is None:
1008 _user = -1
1009 # user can either be an int (the uid) or a string (the system username)
1010 elif isinstance(user, str):
1011 _user = _get_uid(user)
1012 if _user is None:
1013 raise LookupError("no such user: {!r}".format(user))
1014
1015 if group is None:
1016 _group = -1
1017 elif not isinstance(group, int):
1018 _group = _get_gid(group)
1019 if _group is None:
1020 raise LookupError("no such group: {!r}".format(group))
1021
1022 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001023
1024def get_terminal_size(fallback=(80, 24)):
1025 """Get the size of the terminal window.
1026
1027 For each of the two dimensions, the environment variable, COLUMNS
1028 and LINES respectively, is checked. If the variable is defined and
1029 the value is a positive integer, it is used.
1030
1031 When COLUMNS or LINES is not defined, which is the common case,
1032 the terminal connected to sys.__stdout__ is queried
1033 by invoking os.get_terminal_size.
1034
1035 If the terminal size cannot be successfully queried, either because
1036 the system doesn't support querying, or because we are not
1037 connected to a terminal, the value given in fallback parameter
1038 is used. Fallback defaults to (80, 24) which is the default
1039 size used by many terminal emulators.
1040
1041 The value returned is a named tuple of type os.terminal_size.
1042 """
1043 # columns, lines are the working values
1044 try:
1045 columns = int(os.environ['COLUMNS'])
1046 except (KeyError, ValueError):
1047 columns = 0
1048
1049 try:
1050 lines = int(os.environ['LINES'])
1051 except (KeyError, ValueError):
1052 lines = 0
1053
1054 # only query if necessary
1055 if columns <= 0 or lines <= 0:
1056 try:
1057 size = os.get_terminal_size(sys.__stdout__.fileno())
1058 except (NameError, OSError):
1059 size = os.terminal_size(fallback)
1060 if columns <= 0:
1061 columns = size.columns
1062 if lines <= 0:
1063 lines = size.lines
1064
1065 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001066
1067def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001068 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001069 conforms to the given mode on the PATH, or None if there is no such
1070 file.
1071
1072 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1073 of os.environ.get("PATH"), or can be overridden with a custom search
1074 path.
1075
1076 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001077 # Check that a given file can be accessed with the correct mode.
1078 # Additionally check that `file` is not a directory, as on Windows
1079 # directories pass the os.access check.
1080 def _access_check(fn, mode):
1081 return (os.path.exists(fn) and os.access(fn, mode)
1082 and not os.path.isdir(fn))
1083
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001084 # If we're given a path with a directory part, look it up directly rather
1085 # than referring to PATH directories. This includes checking relative to the
1086 # current directory, e.g. ./script
1087 if os.path.dirname(cmd):
1088 if _access_check(cmd, mode):
1089 return cmd
1090 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001091
Barry Warsaw618738b2013-04-16 11:05:03 -04001092 if path is None:
1093 path = os.environ.get("PATH", os.defpath)
1094 if not path:
1095 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001096 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001097
1098 if sys.platform == "win32":
1099 # The current directory takes precedence on Windows.
1100 if not os.curdir in path:
1101 path.insert(0, os.curdir)
1102
1103 # PATHEXT is necessary to check on Windows.
1104 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1105 # See if the given file matches any of the expected path extensions.
1106 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001107 # If it does match, only test that one, otherwise we have to try
1108 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001109 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1110 files = [cmd]
1111 else:
1112 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001113 else:
1114 # On other platforms you don't have things like PATHEXT to tell you
1115 # what file suffixes are executable, so just pass on cmd as-is.
1116 files = [cmd]
1117
1118 seen = set()
1119 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001120 normdir = os.path.normcase(dir)
1121 if not normdir in seen:
1122 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001123 for thefile in files:
1124 name = os.path.join(dir, thefile)
1125 if _access_check(name, mode):
1126 return name
1127 return None