blob: a5da58790ecf8ec7d18d00c455145fec6b2d888f [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Georg Brandl2ee470f2008-07-16 12:55:28 +000010import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000011import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000012import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000013import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000014
15try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000016 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010017 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000018 _BZ2_SUPPORTED = True
Brett Cannoncd171c82013-07-04 17:43:24 -040019except ImportError:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000020 _BZ2_SUPPORTED = False
21
22try:
Serhiy Storchaka11213772014-08-06 18:50:19 +030023 import lzma
24 del lzma
25 _LZMA_SUPPORTED = True
26except ImportError:
27 _LZMA_SUPPORTED = False
28
29try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000030 from pwd import getpwnam
Brett Cannoncd171c82013-07-04 17:43:24 -040031except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000032 getpwnam = None
33
34try:
35 from grp import getgrnam
Brett Cannoncd171c82013-07-04 17:43:24 -040036except ImportError:
Tarek Ziadé396fad72010-02-23 05:30:31 +000037 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000038
Tarek Ziadéc3399782010-02-23 05:39:18 +000039__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
40 "copytree", "move", "rmtree", "Error", "SpecialFileError",
41 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000042 "register_archive_format", "unregister_archive_format",
43 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020044 "unregister_unpack_format", "unpack_archive",
Berker Peksag8083cd62014-11-01 11:04:06 +020045 "ignore_patterns", "chown", "which", "get_terminal_size",
46 "SameFileError"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020047 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000048
Andrew Svetlov3438fa42012-12-17 23:35:18 +020049class Error(OSError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000050 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000051
Hynek Schlawack48653762012-10-07 12:49:58 +020052class SameFileError(Error):
53 """Raised when source and destination are the same file."""
54
Andrew Svetlov3438fa42012-12-17 23:35:18 +020055class SpecialFileError(OSError):
Antoine Pitrou7fff0962009-05-01 21:09:44 +000056 """Raised when trying to do a kind of operation (e.g. copying) which is
57 not supported on a special file (e.g. a named pipe)"""
58
Andrew Svetlov3438fa42012-12-17 23:35:18 +020059class ExecError(OSError):
Tarek Ziadé396fad72010-02-23 05:30:31 +000060 """Raised when a command could not be executed"""
61
Andrew Svetlov3438fa42012-12-17 23:35:18 +020062class ReadError(OSError):
Tarek Ziadé6ac91722010-04-28 17:51:36 +000063 """Raised when an archive cannot be read"""
64
65class RegistryError(Exception):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +030066 """Raised when a registry operation with the archiving
Tarek Ziadé6ac91722010-04-28 17:51:36 +000067 and unpacking registeries fails"""
68
69
Greg Stein42bb8b32000-07-12 09:55:30 +000070def copyfileobj(fsrc, fdst, length=16*1024):
71 """copy data from file-like object fsrc to file-like object fdst"""
72 while 1:
73 buf = fsrc.read(length)
74 if not buf:
75 break
76 fdst.write(buf)
77
Johannes Gijsbers46f14592004-08-14 13:30:02 +000078def _samefile(src, dst):
79 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000080 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000081 try:
82 return os.path.samefile(src, dst)
83 except OSError:
84 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000085
86 # All other platforms: check for same pathname.
87 return (os.path.normcase(os.path.abspath(src)) ==
88 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000089
Larry Hastingsb4038062012-07-15 10:57:38 -070090def copyfile(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +010091 """Copy data from src to dst.
92
Larry Hastingsb4038062012-07-15 10:57:38 -070093 If follow_symlinks is not set and src is a symbolic link, a new
Antoine Pitrou78091e62011-12-29 18:54:15 +010094 symlink will be created instead of copying the file it points to.
95
96 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000097 if _samefile(src, dst):
Hynek Schlawack48653762012-10-07 12:49:58 +020098 raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000099
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000100 for fn in [src, dst]:
101 try:
102 st = os.stat(fn)
103 except OSError:
104 # File most likely does not exist
105 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000106 else:
107 # XXX What about other special files? (sockets, devices...)
108 if stat.S_ISFIFO(st.st_mode):
109 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000110
Larry Hastingsb4038062012-07-15 10:57:38 -0700111 if not follow_symlinks and os.path.islink(src):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100112 os.symlink(os.readlink(src), dst)
113 else:
114 with open(src, 'rb') as fsrc:
115 with open(dst, 'wb') as fdst:
116 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500117 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000118
Larry Hastingsb4038062012-07-15 10:57:38 -0700119def copymode(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100120 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000121
Larry Hastingsb4038062012-07-15 10:57:38 -0700122 If follow_symlinks is not set, symlinks aren't followed if and only
123 if both `src` and `dst` are symlinks. If `lchmod` isn't available
124 (e.g. Linux) this method does nothing.
Antoine Pitrou78091e62011-12-29 18:54:15 +0100125
126 """
Larry Hastingsb4038062012-07-15 10:57:38 -0700127 if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100128 if hasattr(os, 'lchmod'):
129 stat_func, chmod_func = os.lstat, os.lchmod
130 else:
131 return
132 elif hasattr(os, 'chmod'):
133 stat_func, chmod_func = os.stat, os.chmod
134 else:
135 return
136
137 st = stat_func(src)
138 chmod_func(dst, stat.S_IMODE(st.st_mode))
139
Larry Hastingsad5ae042012-07-14 17:55:11 -0700140if hasattr(os, 'listxattr'):
Larry Hastingsb4038062012-07-15 10:57:38 -0700141 def _copyxattr(src, dst, *, follow_symlinks=True):
Larry Hastingsad5ae042012-07-14 17:55:11 -0700142 """Copy extended filesystem attributes from `src` to `dst`.
143
144 Overwrite existing attributes.
145
Larry Hastingsb4038062012-07-15 10:57:38 -0700146 If `follow_symlinks` is false, symlinks won't be followed.
Larry Hastingsad5ae042012-07-14 17:55:11 -0700147
148 """
149
Hynek Schlawack0beab052013-02-05 08:22:44 +0100150 try:
151 names = os.listxattr(src, follow_symlinks=follow_symlinks)
152 except OSError as e:
153 if e.errno not in (errno.ENOTSUP, errno.ENODATA):
154 raise
155 return
156 for name in names:
Larry Hastingsad5ae042012-07-14 17:55:11 -0700157 try:
Larry Hastingsb4038062012-07-15 10:57:38 -0700158 value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
159 os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
Larry Hastingsad5ae042012-07-14 17:55:11 -0700160 except OSError as e:
161 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
162 raise
163else:
164 def _copyxattr(*args, **kwargs):
165 pass
166
Larry Hastingsb4038062012-07-15 10:57:38 -0700167def copystat(src, dst, *, follow_symlinks=True):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100168 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
169
Larry Hastingsb4038062012-07-15 10:57:38 -0700170 If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
Antoine Pitrou78091e62011-12-29 18:54:15 +0100171 only if both `src` and `dst` are symlinks.
172
173 """
Larry Hastings9cf065c2012-06-22 16:30:09 -0700174 def _nop(*args, ns=None, follow_symlinks=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100175 pass
176
Larry Hastings9cf065c2012-06-22 16:30:09 -0700177 # follow symlinks (aka don't not follow symlinks)
Larry Hastingsb4038062012-07-15 10:57:38 -0700178 follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
Larry Hastings9cf065c2012-06-22 16:30:09 -0700179 if follow:
180 # use the real function if it exists
181 def lookup(name):
182 return getattr(os, name, _nop)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100183 else:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700184 # use the real function only if it exists
185 # *and* it supports follow_symlinks
186 def lookup(name):
187 fn = getattr(os, name, _nop)
188 if fn in os.supports_follow_symlinks:
189 return fn
190 return _nop
Antoine Pitrou78091e62011-12-29 18:54:15 +0100191
Larry Hastings9cf065c2012-06-22 16:30:09 -0700192 st = lookup("stat")(src, follow_symlinks=follow)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000193 mode = stat.S_IMODE(st.st_mode)
Larry Hastings9cf065c2012-06-22 16:30:09 -0700194 lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
195 follow_symlinks=follow)
196 try:
197 lookup("chmod")(dst, mode, follow_symlinks=follow)
198 except NotImplementedError:
199 # if we got a NotImplementedError, it's because
200 # * follow_symlinks=False,
201 # * lchown() is unavailable, and
202 # * either
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300203 # * fchownat() is unavailable or
Larry Hastings9cf065c2012-06-22 16:30:09 -0700204 # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
205 # (it returned ENOSUP.)
206 # therefore we're out of options--we simply cannot chown the
207 # symlink. give up, suppress the error.
208 # (which is what shutil always did in this circumstance.)
209 pass
Antoine Pitrou78091e62011-12-29 18:54:15 +0100210 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000211 try:
Larry Hastings9cf065c2012-06-22 16:30:09 -0700212 lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000213 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700214 for err in 'EOPNOTSUPP', 'ENOTSUP':
215 if hasattr(errno, err) and why.errno == getattr(errno, err):
216 break
217 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000218 raise
Larry Hastingsb4038062012-07-15 10:57:38 -0700219 _copyxattr(src, dst, follow_symlinks=follow)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200220
Larry Hastingsb4038062012-07-15 10:57:38 -0700221def copy(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500222 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000223
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000224 The destination may be a directory.
225
Larry Hastingsb4038062012-07-15 10:57:38 -0700226 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100227 resembles GNU's "cp -P src dst".
228
Hynek Schlawack48653762012-10-07 12:49:58 +0200229 If source and destination are the same file, a SameFileError will be
230 raised.
231
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000232 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000233 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000234 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700235 copyfile(src, dst, follow_symlinks=follow_symlinks)
236 copymode(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500237 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000238
Larry Hastingsb4038062012-07-15 10:57:38 -0700239def copy2(src, dst, *, follow_symlinks=True):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500240 """Copy data and all stat info ("cp -p src dst"). Return the file's
241 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000242
243 The destination may be a directory.
244
Larry Hastingsb4038062012-07-15 10:57:38 -0700245 If follow_symlinks is false, symlinks won't be followed. This
Antoine Pitrou78091e62011-12-29 18:54:15 +0100246 resembles GNU's "cp -P src dst".
247
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000248 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000249 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000250 dst = os.path.join(dst, os.path.basename(src))
Larry Hastingsb4038062012-07-15 10:57:38 -0700251 copyfile(src, dst, follow_symlinks=follow_symlinks)
252 copystat(src, dst, follow_symlinks=follow_symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500253 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000254
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255def ignore_patterns(*patterns):
256 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000257
Georg Brandl2ee470f2008-07-16 12:55:28 +0000258 Patterns is a sequence of glob-style patterns
259 that are used to exclude files"""
260 def _ignore_patterns(path, names):
261 ignored_names = []
262 for pattern in patterns:
263 ignored_names.extend(fnmatch.filter(names, pattern))
264 return set(ignored_names)
265 return _ignore_patterns
266
Tarek Ziadéfb437512010-04-20 08:57:33 +0000267def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
268 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000269 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000270
271 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000272 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000273
274 If the optional symlinks flag is true, symbolic links in the
275 source tree result in symbolic links in the destination tree; if
276 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000277 links are copied. If the file pointed by the symlink doesn't
278 exist, an exception will be added in the list of errors raised in
279 an Error exception at the end of the copy process.
280
281 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000282 want to silence this exception. Notice that this has no effect on
283 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000284
Georg Brandl2ee470f2008-07-16 12:55:28 +0000285 The optional ignore argument is a callable. If given, it
286 is called with the `src` parameter, which is the directory
287 being visited by copytree(), and `names` which is the list of
288 `src` contents, as returned by os.listdir():
289
290 callable(src, names) -> ignored_names
291
292 Since copytree() is called recursively, the callable will be
293 called once for each directory that is copied. It returns a
294 list of names relative to the `src` directory that should
295 not be copied.
296
Tarek Ziadé5340db32010-04-19 22:30:51 +0000297 The optional copy_function argument is a callable that will be used
298 to copy each file. It will be called with the source path and the
299 destination path as arguments. By default, copy2() is used, but any
300 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000301
302 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000303 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000304 if ignore is not None:
305 ignored_names = ignore(src, names)
306 else:
307 ignored_names = set()
308
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000309 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000310 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000311 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000312 if name in ignored_names:
313 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000314 srcname = os.path.join(src, name)
315 dstname = os.path.join(dst, name)
316 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000317 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000318 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000319 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100320 # We can't just leave it to `copy_function` because legacy
321 # code with a custom `copy_function` may rely on copytree
322 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000323 os.symlink(linkto, dstname)
Larry Hastingsb4038062012-07-15 10:57:38 -0700324 copystat(srcname, dstname, follow_symlinks=not symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000325 else:
326 # ignore dangling symlink if the flag is on
327 if not os.path.exists(linkto) and ignore_dangling_symlinks:
328 continue
329 # otherwise let the copy occurs. copy2 will raise an error
Berker Peksag5a294d82015-07-25 14:53:48 +0300330 if os.path.isdir(srcname):
331 copytree(srcname, dstname, symlinks, ignore,
332 copy_function)
333 else:
334 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000335 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000336 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000337 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000338 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000339 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000340 # catch the Error from the recursive copytree so that we can
341 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000342 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000343 errors.extend(err.args[0])
Andrew Svetlov3438fa42012-12-17 23:35:18 +0200344 except OSError as why:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000345 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000346 try:
347 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000348 except OSError as why:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +0200349 # Copying file access times may fail on Windows
Berker Peksag884afd92014-12-10 02:50:32 +0200350 if getattr(why, 'winerror', None) is None:
Georg Brandlc8076df2012-08-25 10:11:57 +0200351 errors.append((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000352 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000353 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500354 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000355
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200356# version vulnerable to race conditions
357def _rmtree_unsafe(path, onerror):
Christian Heimes9bd667a2008-01-20 15:14:11 +0000358 try:
359 if os.path.islink(path):
360 # symlinks to directories are forbidden, see bug #1669
361 raise OSError("Cannot call rmtree on a symbolic link")
362 except OSError:
363 onerror(os.path.islink, path, sys.exc_info())
364 # can't continue even if onerror hook returns
365 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000366 names = []
367 try:
368 names = os.listdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200369 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000370 onerror(os.listdir, path, sys.exc_info())
371 for name in names:
372 fullname = os.path.join(path, name)
373 try:
374 mode = os.lstat(fullname).st_mode
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200375 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000376 mode = 0
377 if stat.S_ISDIR(mode):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200378 _rmtree_unsafe(fullname, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000379 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000380 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200381 os.unlink(fullname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200382 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200383 onerror(os.unlink, fullname, sys.exc_info())
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000384 try:
385 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200386 except OSError:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000387 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000388
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200389# Version using fd-based APIs to protect against races
390def _rmtree_safe_fd(topfd, path, onerror):
391 names = []
392 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200393 names = os.listdir(topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100394 except OSError as err:
395 err.filename = path
Hynek Schlawack2100b422012-06-23 20:28:32 +0200396 onerror(os.listdir, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200397 for name in names:
398 fullname = os.path.join(path, name)
399 try:
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200400 orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200401 mode = orig_st.st_mode
Hynek Schlawackb5501102012-12-10 09:11:25 +0100402 except OSError:
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200403 mode = 0
404 if stat.S_ISDIR(mode):
405 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200406 dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100407 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200408 onerror(os.open, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200409 else:
410 try:
411 if os.path.samestat(orig_st, os.fstat(dirfd)):
412 _rmtree_safe_fd(dirfd, fullname, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200413 try:
414 os.rmdir(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100415 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200416 onerror(os.rmdir, fullname, sys.exc_info())
Hynek Schlawackb5501102012-12-10 09:11:25 +0100417 else:
418 try:
419 # This can only happen if someone replaces
420 # a directory with a symlink after the call to
421 # stat.S_ISDIR above.
422 raise OSError("Cannot call rmtree on a symbolic "
423 "link")
424 except OSError:
425 onerror(os.path.islink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200426 finally:
427 os.close(dirfd)
428 else:
429 try:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200430 os.unlink(name, dir_fd=topfd)
Hynek Schlawackb5501102012-12-10 09:11:25 +0100431 except OSError:
Hynek Schlawack2100b422012-06-23 20:28:32 +0200432 onerror(os.unlink, fullname, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200433
Hynek Schlawackd0f6e0a2012-06-29 08:28:20 +0200434_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
435 os.supports_dir_fd and
436 os.listdir in os.supports_fd and
437 os.stat in os.supports_follow_symlinks)
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000438
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200439def rmtree(path, ignore_errors=False, onerror=None):
440 """Recursively delete a directory tree.
441
442 If ignore_errors is set, errors are ignored; otherwise, if onerror
443 is set, it is called to handle the error with arguments (func,
Hynek Schlawack2100b422012-06-23 20:28:32 +0200444 path, exc_info) where func is platform and implementation dependent;
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200445 path is the argument to that function that caused it to fail; and
446 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
447 is false and onerror is None, an exception is raised.
448
449 """
450 if ignore_errors:
451 def onerror(*args):
452 pass
453 elif onerror is None:
454 def onerror(*args):
455 raise
456 if _use_fd_functions:
Hynek Schlawack3b527782012-06-25 13:27:31 +0200457 # While the unsafe rmtree works fine on bytes, the fd based does not.
458 if isinstance(path, bytes):
459 path = os.fsdecode(path)
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200460 # Note: To guard against symlink races, we use the standard
461 # lstat()/open()/fstat() trick.
462 try:
463 orig_st = os.lstat(path)
464 except Exception:
465 onerror(os.lstat, path, sys.exc_info())
466 return
467 try:
468 fd = os.open(path, os.O_RDONLY)
469 except Exception:
470 onerror(os.lstat, path, sys.exc_info())
471 return
472 try:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100473 if os.path.samestat(orig_st, os.fstat(fd)):
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200474 _rmtree_safe_fd(fd, path, onerror)
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200475 try:
476 os.rmdir(path)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200477 except OSError:
Hynek Schlawack9f558cc2012-06-28 15:30:47 +0200478 onerror(os.rmdir, path, sys.exc_info())
Hynek Schlawacka75cd1c2012-06-28 12:07:29 +0200479 else:
Hynek Schlawackb5501102012-12-10 09:11:25 +0100480 try:
481 # symlinks to directories are forbidden, see bug #1669
482 raise OSError("Cannot call rmtree on a symbolic link")
483 except OSError:
484 onerror(os.path.islink, path, sys.exc_info())
Hynek Schlawack67be92b2012-06-23 17:58:42 +0200485 finally:
486 os.close(fd)
487 else:
488 return _rmtree_unsafe(path, onerror)
489
Nick Coghlan5b0eca12012-06-24 16:43:06 +1000490# Allow introspection of whether or not the hardening against symlink
491# attacks is supported on the current platform
492rmtree.avoids_symlink_attacks = _use_fd_functions
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000493
Christian Heimesada8c3b2008-03-18 18:26:33 +0000494def _basename(path):
495 # A basename() variant which first strips the trailing slash, if present.
496 # Thus we always get the last component of the path, even for directories.
Serhiy Storchaka3a308b92014-02-11 10:30:59 +0200497 sep = os.path.sep + (os.path.altsep or '')
498 return os.path.basename(path.rstrip(sep))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000499
R David Murray6ffface2014-06-11 14:40:13 -0400500def move(src, dst, copy_function=copy2):
Christian Heimesada8c3b2008-03-18 18:26:33 +0000501 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500502 similar to the Unix "mv" command. Return the file or directory's
503 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000504
505 If the destination is a directory or a symlink to a directory, the source
506 is moved inside the directory. The destination path must not already
507 exist.
508
509 If the destination already exists but is not a directory, it may be
510 overwritten depending on os.rename() semantics.
511
512 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100513 Otherwise, src is copied to the destination and then removed. Symlinks are
514 recreated under the new name if os.rename() fails because of cross
515 filesystem renames.
516
R David Murray6ffface2014-06-11 14:40:13 -0400517 The optional `copy_function` argument is a callable that will be used
518 to copy the source or it will be delegated to `copytree`.
519 By default, copy2() is used, but any function that supports the same
520 signature (like copy()) can be used.
521
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000522 A lot more could be done here... A look at a mv.c shows a lot of
523 the issues this implementation glosses over.
524
525 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000526 real_dst = dst
527 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200528 if _samefile(src, dst):
529 # We might be on a case insensitive filesystem,
530 # perform the rename anyway.
531 os.rename(src, dst)
532 return
533
Christian Heimesada8c3b2008-03-18 18:26:33 +0000534 real_dst = os.path.join(dst, _basename(src))
535 if os.path.exists(real_dst):
536 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000537 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000538 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200539 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100540 if os.path.islink(src):
541 linkto = os.readlink(src)
542 os.symlink(linkto, real_dst)
543 os.unlink(src)
544 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000545 if _destinsrc(src, dst):
R David Murray6ffface2014-06-11 14:40:13 -0400546 raise Error("Cannot move a directory '%s' into itself"
547 " '%s'." % (src, dst))
548 copytree(src, real_dst, copy_function=copy_function,
549 symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000550 rmtree(src)
551 else:
R David Murray6ffface2014-06-11 14:40:13 -0400552 copy_function(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000553 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500554 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000555
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000556def _destinsrc(src, dst):
Berker Peksag3715da52014-09-18 05:11:15 +0300557 src = os.path.abspath(src)
558 dst = os.path.abspath(dst)
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000559 if not src.endswith(os.path.sep):
560 src += os.path.sep
561 if not dst.endswith(os.path.sep):
562 dst += os.path.sep
563 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000564
565def _get_gid(name):
566 """Returns a gid, given a group name."""
567 if getgrnam is None or name is None:
568 return None
569 try:
570 result = getgrnam(name)
571 except KeyError:
572 result = None
573 if result is not None:
574 return result[2]
575 return None
576
577def _get_uid(name):
578 """Returns an uid, given a user name."""
579 if getpwnam is None or name is None:
580 return None
581 try:
582 result = getpwnam(name)
583 except KeyError:
584 result = None
585 if result is not None:
586 return result[2]
587 return None
588
589def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
590 owner=None, group=None, logger=None):
591 """Create a (possibly compressed) tar file from all the files under
592 'base_dir'.
593
Serhiy Storchaka11213772014-08-06 18:50:19 +0300594 'compress' must be "gzip" (the default), "bzip2", "xz", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000595
596 'owner' and 'group' can be used to define an owner and a group for the
597 archive that is being built. If not provided, the current owner and group
598 will be used.
599
Éric Araujo4433a5f2010-12-15 20:26:30 +0000600 The output tar file will be named 'base_name' + ".tar", possibly plus
Serhiy Storchaka11213772014-08-06 18:50:19 +0300601 the appropriate compression extension (".gz", ".bz2", or ".xz").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000602
603 Returns the output filename.
604 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000605 tar_compression = {'gzip': 'gz', None: ''}
606 compress_ext = {'gzip': '.gz'}
607
608 if _BZ2_SUPPORTED:
609 tar_compression['bzip2'] = 'bz2'
610 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000611
Serhiy Storchaka11213772014-08-06 18:50:19 +0300612 if _LZMA_SUPPORTED:
613 tar_compression['xz'] = 'xz'
614 compress_ext['xz'] = '.xz'
615
Tarek Ziadé396fad72010-02-23 05:30:31 +0000616 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200617 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000618 raise ValueError("bad value for 'compress', or compression format not "
619 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000620
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000621 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000622 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000623
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200624 if archive_dir and not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000625 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200626 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000627 if not dry_run:
628 os.makedirs(archive_dir)
629
Tarek Ziadé396fad72010-02-23 05:30:31 +0000630 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000631 if logger is not None:
632 logger.info('Creating tar archive')
633
634 uid = _get_uid(owner)
635 gid = _get_gid(group)
636
637 def _set_uid_gid(tarinfo):
638 if gid is not None:
639 tarinfo.gid = gid
640 tarinfo.gname = group
641 if uid is not None:
642 tarinfo.uid = uid
643 tarinfo.uname = owner
644 return tarinfo
645
646 if not dry_run:
647 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
648 try:
649 tar.add(base_dir, filter=_set_uid_gid)
650 finally:
651 tar.close()
652
Tarek Ziadé396fad72010-02-23 05:30:31 +0000653 return archive_name
654
Tarek Ziadé396fad72010-02-23 05:30:31 +0000655def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
656 """Create a zip file from all the files under 'base_dir'.
657
Éric Araujo4433a5f2010-12-15 20:26:30 +0000658 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000659 "zipfile" Python module (if available) or the InfoZIP "zip" utility
660 (if installed and found on the default search path). If neither tool is
661 available, raises ExecError. Returns the name of the output zip
662 file.
663 """
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400664 import zipfile
665
Tarek Ziadé396fad72010-02-23 05:30:31 +0000666 zip_filename = base_name + ".zip"
667 archive_dir = os.path.dirname(base_name)
668
Serhiy Storchaka9a4fc192014-11-28 00:48:46 +0200669 if archive_dir and not os.path.exists(archive_dir):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000670 if logger is not None:
671 logger.info("creating %s", archive_dir)
672 if not dry_run:
673 os.makedirs(archive_dir)
674
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400675 if logger is not None:
676 logger.info("creating '%s' and adding '%s' to it",
677 zip_filename, base_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000678
Andrew Kuchlinga0934b22014-03-20 16:11:16 -0400679 if not dry_run:
680 with zipfile.ZipFile(zip_filename, "w",
681 compression=zipfile.ZIP_DEFLATED) as zf:
682 for dirpath, dirnames, filenames in os.walk(base_dir):
683 for name in filenames:
684 path = os.path.normpath(os.path.join(dirpath, name))
685 if os.path.isfile(path):
686 zf.write(path, path)
687 if logger is not None:
688 logger.info("adding '%s'", path)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000689
690 return zip_filename
691
692_ARCHIVE_FORMATS = {
693 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000694 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200695 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000696 }
697
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000698if _BZ2_SUPPORTED:
699 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
700 "bzip2'ed tar-file")
701
Serhiy Storchaka11213772014-08-06 18:50:19 +0300702if _LZMA_SUPPORTED:
703 _ARCHIVE_FORMATS['xztar'] = (_make_tarball, [('compress', 'xz')],
704 "xz'ed tar-file")
705
Tarek Ziadé396fad72010-02-23 05:30:31 +0000706def get_archive_formats():
707 """Returns a list of supported formats for archiving and unarchiving.
708
709 Each element of the returned sequence is a tuple (name, description)
710 """
711 formats = [(name, registry[2]) for name, registry in
712 _ARCHIVE_FORMATS.items()]
713 formats.sort()
714 return formats
715
716def register_archive_format(name, function, extra_args=None, description=''):
717 """Registers an archive format.
718
719 name is the name of the format. function is the callable that will be
720 used to create archives. If provided, extra_args is a sequence of
721 (name, value) tuples that will be passed as arguments to the callable.
722 description can be provided to describe the format, and will be returned
723 by the get_archive_formats() function.
724 """
725 if extra_args is None:
726 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200727 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000728 raise TypeError('The %s object is not callable' % function)
729 if not isinstance(extra_args, (tuple, list)):
730 raise TypeError('extra_args needs to be a sequence')
731 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200732 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000733 raise TypeError('extra_args elements are : (arg_name, value)')
734
735 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
736
737def unregister_archive_format(name):
738 del _ARCHIVE_FORMATS[name]
739
740def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
741 dry_run=0, owner=None, group=None, logger=None):
742 """Create an archive file (eg. zip or tar).
743
744 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000745 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
746 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000747
748 'root_dir' is a directory that will be the root directory of the
749 archive; ie. we typically chdir into 'root_dir' before creating the
750 archive. 'base_dir' is the directory where we start archiving from;
751 ie. 'base_dir' will be the common prefix of all files and
752 directories in the archive. 'root_dir' and 'base_dir' both default
753 to the current directory. Returns the name of the archive file.
754
755 'owner' and 'group' are used when creating a tar archive. By default,
756 uses the current owner and group.
757 """
758 save_cwd = os.getcwd()
759 if root_dir is not None:
760 if logger is not None:
761 logger.debug("changing into '%s'", root_dir)
762 base_name = os.path.abspath(base_name)
763 if not dry_run:
764 os.chdir(root_dir)
765
766 if base_dir is None:
767 base_dir = os.curdir
768
769 kwargs = {'dry_run': dry_run, 'logger': logger}
770
771 try:
772 format_info = _ARCHIVE_FORMATS[format]
773 except KeyError:
774 raise ValueError("unknown archive format '%s'" % format)
775
776 func = format_info[0]
777 for arg, val in format_info[1]:
778 kwargs[arg] = val
779
780 if format != 'zip':
781 kwargs['owner'] = owner
782 kwargs['group'] = group
783
784 try:
785 filename = func(base_name, base_dir, **kwargs)
786 finally:
787 if root_dir is not None:
788 if logger is not None:
789 logger.debug("changing back to '%s'", save_cwd)
790 os.chdir(save_cwd)
791
792 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000793
794
795def get_unpack_formats():
796 """Returns a list of supported formats for unpacking.
797
798 Each element of the returned sequence is a tuple
799 (name, extensions, description)
800 """
801 formats = [(name, info[0], info[3]) for name, info in
802 _UNPACK_FORMATS.items()]
803 formats.sort()
804 return formats
805
806def _check_unpack_options(extensions, function, extra_args):
807 """Checks what gets registered as an unpacker."""
808 # first make sure no other unpacker is registered for this extension
809 existing_extensions = {}
810 for name, info in _UNPACK_FORMATS.items():
811 for ext in info[0]:
812 existing_extensions[ext] = name
813
814 for extension in extensions:
815 if extension in existing_extensions:
816 msg = '%s is already registered for "%s"'
817 raise RegistryError(msg % (extension,
818 existing_extensions[extension]))
819
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200820 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000821 raise TypeError('The registered function must be a callable')
822
823
824def register_unpack_format(name, extensions, function, extra_args=None,
825 description=''):
826 """Registers an unpack format.
827
828 `name` is the name of the format. `extensions` is a list of extensions
829 corresponding to the format.
830
831 `function` is the callable that will be
832 used to unpack archives. The callable will receive archives to unpack.
833 If it's unable to handle an archive, it needs to raise a ReadError
834 exception.
835
836 If provided, `extra_args` is a sequence of
837 (name, value) tuples that will be passed as arguments to the callable.
838 description can be provided to describe the format, and will be returned
839 by the get_unpack_formats() function.
840 """
841 if extra_args is None:
842 extra_args = []
843 _check_unpack_options(extensions, function, extra_args)
844 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
845
846def unregister_unpack_format(name):
847 """Removes the pack format from the registery."""
848 del _UNPACK_FORMATS[name]
849
850def _ensure_directory(path):
851 """Ensure that the parent directory of `path` exists"""
852 dirname = os.path.dirname(path)
853 if not os.path.isdir(dirname):
854 os.makedirs(dirname)
855
856def _unpack_zipfile(filename, extract_dir):
857 """Unpack zip `filename` to `extract_dir`
858 """
859 try:
860 import zipfile
Brett Cannoncd171c82013-07-04 17:43:24 -0400861 except ImportError:
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000862 raise ReadError('zlib not supported, cannot unpack this archive.')
863
864 if not zipfile.is_zipfile(filename):
865 raise ReadError("%s is not a zip file" % filename)
866
867 zip = zipfile.ZipFile(filename)
868 try:
869 for info in zip.infolist():
870 name = info.filename
871
872 # don't extract absolute paths or ones with .. in them
873 if name.startswith('/') or '..' in name:
874 continue
875
876 target = os.path.join(extract_dir, *name.split('/'))
877 if not target:
878 continue
879
880 _ensure_directory(target)
881 if not name.endswith('/'):
882 # file
883 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200884 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000885 try:
886 f.write(data)
887 finally:
888 f.close()
889 del data
890 finally:
891 zip.close()
892
893def _unpack_tarfile(filename, extract_dir):
Serhiy Storchaka11213772014-08-06 18:50:19 +0300894 """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000895 """
896 try:
897 tarobj = tarfile.open(filename)
898 except tarfile.TarError:
899 raise ReadError(
900 "%s is not a compressed or uncompressed tar file" % filename)
901 try:
902 tarobj.extractall(extract_dir)
903 finally:
904 tarobj.close()
905
906_UNPACK_FORMATS = {
907 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000908 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
909 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
910 }
911
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000912if _BZ2_SUPPORTED:
Serhiy Storchaka11213772014-08-06 18:50:19 +0300913 _UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000914 "bzip2'ed tar-file")
915
Serhiy Storchaka11213772014-08-06 18:50:19 +0300916if _LZMA_SUPPORTED:
917 _UNPACK_FORMATS['xztar'] = (['.tar.xz', '.txz'], _unpack_tarfile, [],
918 "xz'ed tar-file")
919
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000920def _find_unpack_format(filename):
921 for name, info in _UNPACK_FORMATS.items():
922 for extension in info[0]:
923 if filename.endswith(extension):
924 return name
925 return None
926
927def unpack_archive(filename, extract_dir=None, format=None):
928 """Unpack an archive.
929
930 `filename` is the name of the archive.
931
932 `extract_dir` is the name of the target directory, where the archive
933 is unpacked. If not provided, the current working directory is used.
934
935 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
936 other registered format. If not provided, unpack_archive will use the
937 filename extension and see if an unpacker was registered for that
938 extension.
939
940 In case none is found, a ValueError is raised.
941 """
942 if extract_dir is None:
943 extract_dir = os.getcwd()
944
945 if format is not None:
946 try:
947 format_info = _UNPACK_FORMATS[format]
948 except KeyError:
949 raise ValueError("Unknown unpack format '{0}'".format(format))
950
Nick Coghlanabf202d2011-03-16 13:52:20 -0400951 func = format_info[1]
952 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000953 else:
954 # we need to look at the registered unpackers supported extensions
955 format = _find_unpack_format(filename)
956 if format is None:
957 raise ReadError("Unknown archive format '{0}'".format(filename))
958
959 func = _UNPACK_FORMATS[format][1]
960 kwargs = dict(_UNPACK_FORMATS[format][2])
961 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200962
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200963
964if hasattr(os, 'statvfs'):
965
966 __all__.append('disk_usage')
967 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200968
969 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200970 """Return disk usage statistics about the given path.
971
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200972 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200973 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200974 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200975 st = os.statvfs(path)
976 free = st.f_bavail * st.f_frsize
977 total = st.f_blocks * st.f_frsize
978 used = (st.f_blocks - st.f_bfree) * st.f_frsize
979 return _ntuple_diskusage(total, used, free)
980
981elif os.name == 'nt':
982
983 import nt
984 __all__.append('disk_usage')
985 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
986
987 def disk_usage(path):
988 """Return disk usage statistics about the given path.
989
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300990 Returned values is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200991 'free', which are the amount of total, used and free space, in bytes.
992 """
993 total, free = nt._getdiskusage(path)
994 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200995 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200996
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200997
Sandro Tosid902a142011-08-22 23:28:27 +0200998def chown(path, user=None, group=None):
999 """Change owner user and group of the given path.
1000
1001 user and group can be the uid/gid or the user/group names, and in that case,
1002 they are converted to their respective uid/gid.
1003 """
1004
1005 if user is None and group is None:
1006 raise ValueError("user and/or group must be set")
1007
1008 _user = user
1009 _group = group
1010
1011 # -1 means don't change it
1012 if user is None:
1013 _user = -1
1014 # user can either be an int (the uid) or a string (the system username)
1015 elif isinstance(user, str):
1016 _user = _get_uid(user)
1017 if _user is None:
1018 raise LookupError("no such user: {!r}".format(user))
1019
1020 if group is None:
1021 _group = -1
1022 elif not isinstance(group, int):
1023 _group = _get_gid(group)
1024 if _group is None:
1025 raise LookupError("no such group: {!r}".format(group))
1026
1027 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +01001028
1029def get_terminal_size(fallback=(80, 24)):
1030 """Get the size of the terminal window.
1031
1032 For each of the two dimensions, the environment variable, COLUMNS
1033 and LINES respectively, is checked. If the variable is defined and
1034 the value is a positive integer, it is used.
1035
1036 When COLUMNS or LINES is not defined, which is the common case,
1037 the terminal connected to sys.__stdout__ is queried
1038 by invoking os.get_terminal_size.
1039
1040 If the terminal size cannot be successfully queried, either because
1041 the system doesn't support querying, or because we are not
1042 connected to a terminal, the value given in fallback parameter
1043 is used. Fallback defaults to (80, 24) which is the default
1044 size used by many terminal emulators.
1045
1046 The value returned is a named tuple of type os.terminal_size.
1047 """
1048 # columns, lines are the working values
1049 try:
1050 columns = int(os.environ['COLUMNS'])
1051 except (KeyError, ValueError):
1052 columns = 0
1053
1054 try:
1055 lines = int(os.environ['LINES'])
1056 except (KeyError, ValueError):
1057 lines = 0
1058
1059 # only query if necessary
1060 if columns <= 0 or lines <= 0:
1061 try:
1062 size = os.get_terminal_size(sys.__stdout__.fileno())
1063 except (NameError, OSError):
1064 size = os.terminal_size(fallback)
1065 if columns <= 0:
1066 columns = size.columns
1067 if lines <= 0:
1068 lines = size.lines
1069
1070 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -05001071
1072def which(cmd, mode=os.F_OK | os.X_OK, path=None):
Brian Curtindc00f1e2012-06-22 22:49:12 -05001073 """Given a command, mode, and a PATH string, return the path which
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001074 conforms to the given mode on the PATH, or None if there is no such
1075 file.
1076
1077 `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
1078 of os.environ.get("PATH"), or can be overridden with a custom search
1079 path.
1080
1081 """
Victor Stinner1d006a22013-12-16 23:39:40 +01001082 # Check that a given file can be accessed with the correct mode.
1083 # Additionally check that `file` is not a directory, as on Windows
1084 # directories pass the os.access check.
1085 def _access_check(fn, mode):
1086 return (os.path.exists(fn) and os.access(fn, mode)
1087 and not os.path.isdir(fn))
1088
Serhiy Storchaka8bea2002013-01-23 10:44:21 +02001089 # If we're given a path with a directory part, look it up directly rather
1090 # than referring to PATH directories. This includes checking relative to the
1091 # current directory, e.g. ./script
1092 if os.path.dirname(cmd):
1093 if _access_check(cmd, mode):
1094 return cmd
1095 return None
Brian Curtinc57a3452012-06-22 16:00:30 -05001096
Barry Warsaw618738b2013-04-16 11:05:03 -04001097 if path is None:
1098 path = os.environ.get("PATH", os.defpath)
1099 if not path:
1100 return None
Victor Stinner1d006a22013-12-16 23:39:40 +01001101 path = path.split(os.pathsep)
Brian Curtinc57a3452012-06-22 16:00:30 -05001102
1103 if sys.platform == "win32":
1104 # The current directory takes precedence on Windows.
1105 if not os.curdir in path:
1106 path.insert(0, os.curdir)
1107
1108 # PATHEXT is necessary to check on Windows.
1109 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
1110 # See if the given file matches any of the expected path extensions.
1111 # This will allow us to short circuit when given "python.exe".
Philip Jenvey88bc0d22012-06-23 15:54:38 -07001112 # If it does match, only test that one, otherwise we have to try
1113 # others.
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001114 if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
1115 files = [cmd]
1116 else:
1117 files = [cmd + ext for ext in pathext]
Brian Curtinc57a3452012-06-22 16:00:30 -05001118 else:
1119 # On other platforms you don't have things like PATHEXT to tell you
1120 # what file suffixes are executable, so just pass on cmd as-is.
1121 files = [cmd]
1122
1123 seen = set()
1124 for dir in path:
Serhiy Storchaka014791f2013-01-21 15:00:27 +02001125 normdir = os.path.normcase(dir)
1126 if not normdir in seen:
1127 seen.add(normdir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001128 for thefile in files:
1129 name = os.path.join(dir, thefile)
1130 if _access_check(name, mode):
1131 return name
1132 return None