blob: 8da46d15a452a30f3a23d473f92d7b5797324b9b [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
20except ImportError:
21 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
25except ImportError:
26 getpwnam = None
27
28try:
29 from grp import getgrnam
30except ImportError:
31 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Brian Curtinc57a3452012-06-22 16:00:30 -050039 "ignore_patterns", "chown", "which"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020040 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000041
Neal Norwitz4ce69a52005-09-01 00:45:28 +000042class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000043 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Antoine Pitrou7fff0962009-05-01 21:09:44 +000045class SpecialFileError(EnvironmentError):
46 """Raised when trying to do a kind of operation (e.g. copying) which is
47 not supported on a special file (e.g. a named pipe)"""
48
Tarek Ziadé396fad72010-02-23 05:30:31 +000049class ExecError(EnvironmentError):
50 """Raised when a command could not be executed"""
51
Tarek Ziadé6ac91722010-04-28 17:51:36 +000052class ReadError(EnvironmentError):
53 """Raised when an archive cannot be read"""
54
55class RegistryError(Exception):
56 """Raised when a registery operation with the archiving
57 and unpacking registeries fails"""
58
59
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000060try:
61 WindowsError
62except NameError:
63 WindowsError = None
64
Greg Stein42bb8b32000-07-12 09:55:30 +000065def copyfileobj(fsrc, fdst, length=16*1024):
66 """copy data from file-like object fsrc to file-like object fdst"""
67 while 1:
68 buf = fsrc.read(length)
69 if not buf:
70 break
71 fdst.write(buf)
72
Johannes Gijsbers46f14592004-08-14 13:30:02 +000073def _samefile(src, dst):
74 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000075 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000076 try:
77 return os.path.samefile(src, dst)
78 except OSError:
79 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000080
81 # All other platforms: check for same pathname.
82 return (os.path.normcase(os.path.abspath(src)) ==
83 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000084
Antoine Pitrou78091e62011-12-29 18:54:15 +010085def copyfile(src, dst, symlinks=False):
86 """Copy data from src to dst.
87
88 If optional flag `symlinks` is set and `src` is a symbolic link, a new
89 symlink will be created instead of copying the file it points to.
90
91 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000092 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000093 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000094
Antoine Pitrou7fff0962009-05-01 21:09:44 +000095 for fn in [src, dst]:
96 try:
97 st = os.stat(fn)
98 except OSError:
99 # File most likely does not exist
100 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000101 else:
102 # XXX What about other special files? (sockets, devices...)
103 if stat.S_ISFIFO(st.st_mode):
104 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000105
Antoine Pitrou78091e62011-12-29 18:54:15 +0100106 if symlinks and os.path.islink(src):
107 os.symlink(os.readlink(src), dst)
108 else:
109 with open(src, 'rb') as fsrc:
110 with open(dst, 'wb') as fdst:
111 copyfileobj(fsrc, fdst)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500112 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000113
Antoine Pitrou78091e62011-12-29 18:54:15 +0100114def copymode(src, dst, symlinks=False):
115 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000116
Antoine Pitrou78091e62011-12-29 18:54:15 +0100117 If the optional flag `symlinks` is set, symlinks aren't followed if and
118 only if both `src` and `dst` are symlinks. If `lchmod` isn't available (eg.
119 Linux), in these cases, this method does nothing.
120
121 """
122 if symlinks and os.path.islink(src) and os.path.islink(dst):
123 if hasattr(os, 'lchmod'):
124 stat_func, chmod_func = os.lstat, os.lchmod
125 else:
126 return
127 elif hasattr(os, 'chmod'):
128 stat_func, chmod_func = os.stat, os.chmod
129 else:
130 return
131
132 st = stat_func(src)
133 chmod_func(dst, stat.S_IMODE(st.st_mode))
134
135def copystat(src, dst, symlinks=False):
136 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
137
138 If the optional flag `symlinks` is set, symlinks aren't followed if and
139 only if both `src` and `dst` are symlinks.
140
141 """
Larry Hastingsb1454482012-05-03 12:56:44 -0700142 def _nop(*args, ns=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100143 pass
144
145 if symlinks and os.path.islink(src) and os.path.islink(dst):
146 stat_func = os.lstat
147 utime_func = os.lutimes if hasattr(os, 'lutimes') else _nop
148 chmod_func = os.lchmod if hasattr(os, 'lchmod') else _nop
149 chflags_func = os.lchflags if hasattr(os, 'lchflags') else _nop
150 else:
151 stat_func = os.stat
152 utime_func = os.utime if hasattr(os, 'utime') else _nop
153 chmod_func = os.chmod if hasattr(os, 'chmod') else _nop
154 chflags_func = os.chflags if hasattr(os, 'chflags') else _nop
155
156 st = stat_func(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000157 mode = stat.S_IMODE(st.st_mode)
Larry Hastings76ad59b2012-05-03 00:30:07 -0700158 utime_func(dst, ns=(st.st_atime_ns, st.st_mtime_ns))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100159 chmod_func(dst, mode)
160 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000161 try:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100162 chflags_func(dst, st.st_flags)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000163 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700164 for err in 'EOPNOTSUPP', 'ENOTSUP':
165 if hasattr(errno, err) and why.errno == getattr(errno, err):
166 break
167 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000168 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000169
Antoine Pitrou424246f2012-05-12 19:02:01 +0200170if hasattr(os, 'listxattr'):
171 def _copyxattr(src, dst, symlinks=False):
172 """Copy extended filesystem attributes from `src` to `dst`.
173
174 Overwrite existing attributes.
175
176 If the optional flag `symlinks` is set, symlinks won't be followed.
177
178 """
179 if symlinks:
180 listxattr = os.llistxattr
181 removexattr = os.lremovexattr
182 setxattr = os.lsetxattr
183 getxattr = os.lgetxattr
184 else:
185 listxattr = os.listxattr
186 removexattr = os.removexattr
187 setxattr = os.setxattr
188 getxattr = os.getxattr
189
190 for attr in listxattr(src):
191 try:
192 setxattr(dst, attr, getxattr(src, attr))
193 except OSError as e:
194 if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
195 raise
196else:
197 def _copyxattr(*args, **kwargs):
198 pass
199
Antoine Pitrou78091e62011-12-29 18:54:15 +0100200def copy(src, dst, symlinks=False):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500201 """Copy data and mode bits ("cp src dst"). Return the file's destination.
Tim Peters495ad3c2001-01-15 01:36:40 +0000202
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000203 The destination may be a directory.
204
Antoine Pitrou78091e62011-12-29 18:54:15 +0100205 If the optional flag `symlinks` is set, symlinks won't be followed. This
206 resembles GNU's "cp -P src dst".
207
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000208 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000209 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000210 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100211 copyfile(src, dst, symlinks=symlinks)
212 copymode(src, dst, symlinks=symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500213 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000214
Antoine Pitrou78091e62011-12-29 18:54:15 +0100215def copy2(src, dst, symlinks=False):
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500216 """Copy data and all stat info ("cp -p src dst"). Return the file's
217 destination."
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000218
219 The destination may be a directory.
220
Antoine Pitrou78091e62011-12-29 18:54:15 +0100221 If the optional flag `symlinks` is set, symlinks won't be followed. This
222 resembles GNU's "cp -P src dst".
223
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000224 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000225 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000226 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100227 copyfile(src, dst, symlinks=symlinks)
228 copystat(src, dst, symlinks=symlinks)
Antoine Pitrou424246f2012-05-12 19:02:01 +0200229 _copyxattr(src, dst, symlinks=symlinks)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500230 return dst
Guido van Rossumc6360141990-10-13 19:23:40 +0000231
Georg Brandl2ee470f2008-07-16 12:55:28 +0000232def ignore_patterns(*patterns):
233 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000234
Georg Brandl2ee470f2008-07-16 12:55:28 +0000235 Patterns is a sequence of glob-style patterns
236 that are used to exclude files"""
237 def _ignore_patterns(path, names):
238 ignored_names = []
239 for pattern in patterns:
240 ignored_names.extend(fnmatch.filter(names, pattern))
241 return set(ignored_names)
242 return _ignore_patterns
243
Tarek Ziadéfb437512010-04-20 08:57:33 +0000244def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
245 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000246 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000247
248 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000249 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000250
251 If the optional symlinks flag is true, symbolic links in the
252 source tree result in symbolic links in the destination tree; if
253 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000254 links are copied. If the file pointed by the symlink doesn't
255 exist, an exception will be added in the list of errors raised in
256 an Error exception at the end of the copy process.
257
258 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000259 want to silence this exception. Notice that this has no effect on
260 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000261
Georg Brandl2ee470f2008-07-16 12:55:28 +0000262 The optional ignore argument is a callable. If given, it
263 is called with the `src` parameter, which is the directory
264 being visited by copytree(), and `names` which is the list of
265 `src` contents, as returned by os.listdir():
266
267 callable(src, names) -> ignored_names
268
269 Since copytree() is called recursively, the callable will be
270 called once for each directory that is copied. It returns a
271 list of names relative to the `src` directory that should
272 not be copied.
273
Tarek Ziadé5340db32010-04-19 22:30:51 +0000274 The optional copy_function argument is a callable that will be used
275 to copy each file. It will be called with the source path and the
276 destination path as arguments. By default, copy2() is used, but any
277 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000278
279 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000280 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000281 if ignore is not None:
282 ignored_names = ignore(src, names)
283 else:
284 ignored_names = set()
285
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000286 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000287 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000288 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000289 if name in ignored_names:
290 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000291 srcname = os.path.join(src, name)
292 dstname = os.path.join(dst, name)
293 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000294 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000295 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000296 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100297 # We can't just leave it to `copy_function` because legacy
298 # code with a custom `copy_function` may rely on copytree
299 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000300 os.symlink(linkto, dstname)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100301 copystat(srcname, dstname, symlinks=symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000302 else:
303 # ignore dangling symlink if the flag is on
304 if not os.path.exists(linkto) and ignore_dangling_symlinks:
305 continue
306 # otherwise let the copy occurs. copy2 will raise an error
307 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000308 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000309 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000310 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000311 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000312 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000313 # catch the Error from the recursive copytree so that we can
314 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000315 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000316 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000317 except EnvironmentError as why:
318 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 try:
320 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000321 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000322 if WindowsError is not None and isinstance(why, WindowsError):
323 # Copying file access times may fail on Windows
324 pass
325 else:
326 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000327 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000328 raise Error(errors)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500329 return dst
Guido van Rossumd7673291998-02-06 21:38:09 +0000330
Barry Warsaw234d9a92003-01-24 17:36:15 +0000331def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000332 """Recursively delete a directory tree.
333
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000334 If ignore_errors is set, errors are ignored; otherwise, if onerror
335 is set, it is called to handle the error with arguments (func,
336 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
337 path is the argument to that function that caused it to fail; and
338 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
339 is false and onerror is None, an exception is raised.
340
Guido van Rossumd7673291998-02-06 21:38:09 +0000341 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000342 if ignore_errors:
343 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000344 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000345 elif onerror is None:
346 def onerror(*args):
347 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000348 try:
349 if os.path.islink(path):
350 # symlinks to directories are forbidden, see bug #1669
351 raise OSError("Cannot call rmtree on a symbolic link")
352 except OSError:
353 onerror(os.path.islink, path, sys.exc_info())
354 # can't continue even if onerror hook returns
355 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000356 names = []
357 try:
358 names = os.listdir(path)
Éric Araujocfcc9772011-08-10 20:54:33 +0200359 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000360 onerror(os.listdir, path, sys.exc_info())
361 for name in names:
362 fullname = os.path.join(path, name)
363 try:
364 mode = os.lstat(fullname).st_mode
365 except os.error:
366 mode = 0
367 if stat.S_ISDIR(mode):
368 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000369 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000370 try:
371 os.remove(fullname)
Éric Araujocfcc9772011-08-10 20:54:33 +0200372 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000373 onerror(os.remove, fullname, sys.exc_info())
374 try:
375 os.rmdir(path)
376 except os.error:
377 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000378
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000379
Christian Heimesada8c3b2008-03-18 18:26:33 +0000380def _basename(path):
381 # A basename() variant which first strips the trailing slash, if present.
382 # Thus we always get the last component of the path, even for directories.
383 return os.path.basename(path.rstrip(os.path.sep))
384
385def move(src, dst):
386 """Recursively move a file or directory to another location. This is
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500387 similar to the Unix "mv" command. Return the file or directory's
388 destination.
Christian Heimesada8c3b2008-03-18 18:26:33 +0000389
390 If the destination is a directory or a symlink to a directory, the source
391 is moved inside the directory. The destination path must not already
392 exist.
393
394 If the destination already exists but is not a directory, it may be
395 overwritten depending on os.rename() semantics.
396
397 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100398 Otherwise, src is copied to the destination and then removed. Symlinks are
399 recreated under the new name if os.rename() fails because of cross
400 filesystem renames.
401
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000402 A lot more could be done here... A look at a mv.c shows a lot of
403 the issues this implementation glosses over.
404
405 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000406 real_dst = dst
407 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200408 if _samefile(src, dst):
409 # We might be on a case insensitive filesystem,
410 # perform the rename anyway.
411 os.rename(src, dst)
412 return
413
Christian Heimesada8c3b2008-03-18 18:26:33 +0000414 real_dst = os.path.join(dst, _basename(src))
415 if os.path.exists(real_dst):
416 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000417 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000418 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200419 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100420 if os.path.islink(src):
421 linkto = os.readlink(src)
422 os.symlink(linkto, real_dst)
423 os.unlink(src)
424 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000425 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000426 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000427 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000428 rmtree(src)
429 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000430 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000431 os.unlink(src)
Brian Curtin0d0a1de2012-06-18 18:41:07 -0500432 return real_dst
Brett Cannon1c3fa182004-06-19 21:11:35 +0000433
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000434def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000435 src = abspath(src)
436 dst = abspath(dst)
437 if not src.endswith(os.path.sep):
438 src += os.path.sep
439 if not dst.endswith(os.path.sep):
440 dst += os.path.sep
441 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000442
443def _get_gid(name):
444 """Returns a gid, given a group name."""
445 if getgrnam is None or name is None:
446 return None
447 try:
448 result = getgrnam(name)
449 except KeyError:
450 result = None
451 if result is not None:
452 return result[2]
453 return None
454
455def _get_uid(name):
456 """Returns an uid, given a user name."""
457 if getpwnam is None or name is None:
458 return None
459 try:
460 result = getpwnam(name)
461 except KeyError:
462 result = None
463 if result is not None:
464 return result[2]
465 return None
466
467def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
468 owner=None, group=None, logger=None):
469 """Create a (possibly compressed) tar file from all the files under
470 'base_dir'.
471
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000472 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000473
474 'owner' and 'group' can be used to define an owner and a group for the
475 archive that is being built. If not provided, the current owner and group
476 will be used.
477
Éric Araujo4433a5f2010-12-15 20:26:30 +0000478 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000479 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000480
481 Returns the output filename.
482 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000483 tar_compression = {'gzip': 'gz', None: ''}
484 compress_ext = {'gzip': '.gz'}
485
486 if _BZ2_SUPPORTED:
487 tar_compression['bzip2'] = 'bz2'
488 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000489
490 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200491 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000492 raise ValueError("bad value for 'compress', or compression format not "
493 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000494
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000495 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000496 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000497
Tarek Ziadé396fad72010-02-23 05:30:31 +0000498 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000499 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200500 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000501 if not dry_run:
502 os.makedirs(archive_dir)
503
Tarek Ziadé396fad72010-02-23 05:30:31 +0000504 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000505 if logger is not None:
506 logger.info('Creating tar archive')
507
508 uid = _get_uid(owner)
509 gid = _get_gid(group)
510
511 def _set_uid_gid(tarinfo):
512 if gid is not None:
513 tarinfo.gid = gid
514 tarinfo.gname = group
515 if uid is not None:
516 tarinfo.uid = uid
517 tarinfo.uname = owner
518 return tarinfo
519
520 if not dry_run:
521 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
522 try:
523 tar.add(base_dir, filter=_set_uid_gid)
524 finally:
525 tar.close()
526
Tarek Ziadé396fad72010-02-23 05:30:31 +0000527 return archive_name
528
Tarek Ziadée2124162010-04-21 13:35:21 +0000529def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000530 # XXX see if we want to keep an external call here
531 if verbose:
532 zipoptions = "-r"
533 else:
534 zipoptions = "-rq"
535 from distutils.errors import DistutilsExecError
536 from distutils.spawn import spawn
537 try:
538 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
539 except DistutilsExecError:
540 # XXX really should distinguish between "couldn't find
541 # external 'zip' command" and "zip failed".
542 raise ExecError("unable to create zip file '%s': "
543 "could neither import the 'zipfile' module nor "
544 "find a standalone zip utility") % zip_filename
545
546def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
547 """Create a zip file from all the files under 'base_dir'.
548
Éric Araujo4433a5f2010-12-15 20:26:30 +0000549 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000550 "zipfile" Python module (if available) or the InfoZIP "zip" utility
551 (if installed and found on the default search path). If neither tool is
552 available, raises ExecError. Returns the name of the output zip
553 file.
554 """
555 zip_filename = base_name + ".zip"
556 archive_dir = os.path.dirname(base_name)
557
558 if not os.path.exists(archive_dir):
559 if logger is not None:
560 logger.info("creating %s", archive_dir)
561 if not dry_run:
562 os.makedirs(archive_dir)
563
564 # If zipfile module is not available, try spawning an external 'zip'
565 # command.
566 try:
567 import zipfile
568 except ImportError:
569 zipfile = None
570
571 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000572 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000573 else:
574 if logger is not None:
575 logger.info("creating '%s' and adding '%s' to it",
576 zip_filename, base_dir)
577
578 if not dry_run:
579 zip = zipfile.ZipFile(zip_filename, "w",
580 compression=zipfile.ZIP_DEFLATED)
581
582 for dirpath, dirnames, filenames in os.walk(base_dir):
583 for name in filenames:
584 path = os.path.normpath(os.path.join(dirpath, name))
585 if os.path.isfile(path):
586 zip.write(path, path)
587 if logger is not None:
588 logger.info("adding '%s'", path)
589 zip.close()
590
591 return zip_filename
592
593_ARCHIVE_FORMATS = {
594 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000595 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200596 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000597 }
598
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000599if _BZ2_SUPPORTED:
600 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
601 "bzip2'ed tar-file")
602
Tarek Ziadé396fad72010-02-23 05:30:31 +0000603def get_archive_formats():
604 """Returns a list of supported formats for archiving and unarchiving.
605
606 Each element of the returned sequence is a tuple (name, description)
607 """
608 formats = [(name, registry[2]) for name, registry in
609 _ARCHIVE_FORMATS.items()]
610 formats.sort()
611 return formats
612
613def register_archive_format(name, function, extra_args=None, description=''):
614 """Registers an archive format.
615
616 name is the name of the format. function is the callable that will be
617 used to create archives. If provided, extra_args is a sequence of
618 (name, value) tuples that will be passed as arguments to the callable.
619 description can be provided to describe the format, and will be returned
620 by the get_archive_formats() function.
621 """
622 if extra_args is None:
623 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200624 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000625 raise TypeError('The %s object is not callable' % function)
626 if not isinstance(extra_args, (tuple, list)):
627 raise TypeError('extra_args needs to be a sequence')
628 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200629 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000630 raise TypeError('extra_args elements are : (arg_name, value)')
631
632 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
633
634def unregister_archive_format(name):
635 del _ARCHIVE_FORMATS[name]
636
637def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
638 dry_run=0, owner=None, group=None, logger=None):
639 """Create an archive file (eg. zip or tar).
640
641 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000642 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
643 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000644
645 'root_dir' is a directory that will be the root directory of the
646 archive; ie. we typically chdir into 'root_dir' before creating the
647 archive. 'base_dir' is the directory where we start archiving from;
648 ie. 'base_dir' will be the common prefix of all files and
649 directories in the archive. 'root_dir' and 'base_dir' both default
650 to the current directory. Returns the name of the archive file.
651
652 'owner' and 'group' are used when creating a tar archive. By default,
653 uses the current owner and group.
654 """
655 save_cwd = os.getcwd()
656 if root_dir is not None:
657 if logger is not None:
658 logger.debug("changing into '%s'", root_dir)
659 base_name = os.path.abspath(base_name)
660 if not dry_run:
661 os.chdir(root_dir)
662
663 if base_dir is None:
664 base_dir = os.curdir
665
666 kwargs = {'dry_run': dry_run, 'logger': logger}
667
668 try:
669 format_info = _ARCHIVE_FORMATS[format]
670 except KeyError:
671 raise ValueError("unknown archive format '%s'" % format)
672
673 func = format_info[0]
674 for arg, val in format_info[1]:
675 kwargs[arg] = val
676
677 if format != 'zip':
678 kwargs['owner'] = owner
679 kwargs['group'] = group
680
681 try:
682 filename = func(base_name, base_dir, **kwargs)
683 finally:
684 if root_dir is not None:
685 if logger is not None:
686 logger.debug("changing back to '%s'", save_cwd)
687 os.chdir(save_cwd)
688
689 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000690
691
692def get_unpack_formats():
693 """Returns a list of supported formats for unpacking.
694
695 Each element of the returned sequence is a tuple
696 (name, extensions, description)
697 """
698 formats = [(name, info[0], info[3]) for name, info in
699 _UNPACK_FORMATS.items()]
700 formats.sort()
701 return formats
702
703def _check_unpack_options(extensions, function, extra_args):
704 """Checks what gets registered as an unpacker."""
705 # first make sure no other unpacker is registered for this extension
706 existing_extensions = {}
707 for name, info in _UNPACK_FORMATS.items():
708 for ext in info[0]:
709 existing_extensions[ext] = name
710
711 for extension in extensions:
712 if extension in existing_extensions:
713 msg = '%s is already registered for "%s"'
714 raise RegistryError(msg % (extension,
715 existing_extensions[extension]))
716
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200717 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000718 raise TypeError('The registered function must be a callable')
719
720
721def register_unpack_format(name, extensions, function, extra_args=None,
722 description=''):
723 """Registers an unpack format.
724
725 `name` is the name of the format. `extensions` is a list of extensions
726 corresponding to the format.
727
728 `function` is the callable that will be
729 used to unpack archives. The callable will receive archives to unpack.
730 If it's unable to handle an archive, it needs to raise a ReadError
731 exception.
732
733 If provided, `extra_args` is a sequence of
734 (name, value) tuples that will be passed as arguments to the callable.
735 description can be provided to describe the format, and will be returned
736 by the get_unpack_formats() function.
737 """
738 if extra_args is None:
739 extra_args = []
740 _check_unpack_options(extensions, function, extra_args)
741 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
742
743def unregister_unpack_format(name):
744 """Removes the pack format from the registery."""
745 del _UNPACK_FORMATS[name]
746
747def _ensure_directory(path):
748 """Ensure that the parent directory of `path` exists"""
749 dirname = os.path.dirname(path)
750 if not os.path.isdir(dirname):
751 os.makedirs(dirname)
752
753def _unpack_zipfile(filename, extract_dir):
754 """Unpack zip `filename` to `extract_dir`
755 """
756 try:
757 import zipfile
758 except ImportError:
759 raise ReadError('zlib not supported, cannot unpack this archive.')
760
761 if not zipfile.is_zipfile(filename):
762 raise ReadError("%s is not a zip file" % filename)
763
764 zip = zipfile.ZipFile(filename)
765 try:
766 for info in zip.infolist():
767 name = info.filename
768
769 # don't extract absolute paths or ones with .. in them
770 if name.startswith('/') or '..' in name:
771 continue
772
773 target = os.path.join(extract_dir, *name.split('/'))
774 if not target:
775 continue
776
777 _ensure_directory(target)
778 if not name.endswith('/'):
779 # file
780 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200781 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000782 try:
783 f.write(data)
784 finally:
785 f.close()
786 del data
787 finally:
788 zip.close()
789
790def _unpack_tarfile(filename, extract_dir):
791 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
792 """
793 try:
794 tarobj = tarfile.open(filename)
795 except tarfile.TarError:
796 raise ReadError(
797 "%s is not a compressed or uncompressed tar file" % filename)
798 try:
799 tarobj.extractall(extract_dir)
800 finally:
801 tarobj.close()
802
803_UNPACK_FORMATS = {
804 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000805 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
806 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
807 }
808
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000809if _BZ2_SUPPORTED:
810 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
811 "bzip2'ed tar-file")
812
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000813def _find_unpack_format(filename):
814 for name, info in _UNPACK_FORMATS.items():
815 for extension in info[0]:
816 if filename.endswith(extension):
817 return name
818 return None
819
820def unpack_archive(filename, extract_dir=None, format=None):
821 """Unpack an archive.
822
823 `filename` is the name of the archive.
824
825 `extract_dir` is the name of the target directory, where the archive
826 is unpacked. If not provided, the current working directory is used.
827
828 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
829 other registered format. If not provided, unpack_archive will use the
830 filename extension and see if an unpacker was registered for that
831 extension.
832
833 In case none is found, a ValueError is raised.
834 """
835 if extract_dir is None:
836 extract_dir = os.getcwd()
837
838 if format is not None:
839 try:
840 format_info = _UNPACK_FORMATS[format]
841 except KeyError:
842 raise ValueError("Unknown unpack format '{0}'".format(format))
843
Nick Coghlanabf202d2011-03-16 13:52:20 -0400844 func = format_info[1]
845 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000846 else:
847 # we need to look at the registered unpackers supported extensions
848 format = _find_unpack_format(filename)
849 if format is None:
850 raise ReadError("Unknown archive format '{0}'".format(filename))
851
852 func = _UNPACK_FORMATS[format][1]
853 kwargs = dict(_UNPACK_FORMATS[format][2])
854 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200855
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200856
857if hasattr(os, 'statvfs'):
858
859 __all__.append('disk_usage')
860 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200861
862 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200863 """Return disk usage statistics about the given path.
864
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200865 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200866 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200867 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200868 st = os.statvfs(path)
869 free = st.f_bavail * st.f_frsize
870 total = st.f_blocks * st.f_frsize
871 used = (st.f_blocks - st.f_bfree) * st.f_frsize
872 return _ntuple_diskusage(total, used, free)
873
874elif os.name == 'nt':
875
876 import nt
877 __all__.append('disk_usage')
878 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
879
880 def disk_usage(path):
881 """Return disk usage statistics about the given path.
882
883 Returned valus is a named tuple with attributes 'total', 'used' and
884 'free', which are the amount of total, used and free space, in bytes.
885 """
886 total, free = nt._getdiskusage(path)
887 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200888 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200889
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200890
Sandro Tosid902a142011-08-22 23:28:27 +0200891def chown(path, user=None, group=None):
892 """Change owner user and group of the given path.
893
894 user and group can be the uid/gid or the user/group names, and in that case,
895 they are converted to their respective uid/gid.
896 """
897
898 if user is None and group is None:
899 raise ValueError("user and/or group must be set")
900
901 _user = user
902 _group = group
903
904 # -1 means don't change it
905 if user is None:
906 _user = -1
907 # user can either be an int (the uid) or a string (the system username)
908 elif isinstance(user, str):
909 _user = _get_uid(user)
910 if _user is None:
911 raise LookupError("no such user: {!r}".format(user))
912
913 if group is None:
914 _group = -1
915 elif not isinstance(group, int):
916 _group = _get_gid(group)
917 if _group is None:
918 raise LookupError("no such group: {!r}".format(group))
919
920 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +0100921
922def get_terminal_size(fallback=(80, 24)):
923 """Get the size of the terminal window.
924
925 For each of the two dimensions, the environment variable, COLUMNS
926 and LINES respectively, is checked. If the variable is defined and
927 the value is a positive integer, it is used.
928
929 When COLUMNS or LINES is not defined, which is the common case,
930 the terminal connected to sys.__stdout__ is queried
931 by invoking os.get_terminal_size.
932
933 If the terminal size cannot be successfully queried, either because
934 the system doesn't support querying, or because we are not
935 connected to a terminal, the value given in fallback parameter
936 is used. Fallback defaults to (80, 24) which is the default
937 size used by many terminal emulators.
938
939 The value returned is a named tuple of type os.terminal_size.
940 """
941 # columns, lines are the working values
942 try:
943 columns = int(os.environ['COLUMNS'])
944 except (KeyError, ValueError):
945 columns = 0
946
947 try:
948 lines = int(os.environ['LINES'])
949 except (KeyError, ValueError):
950 lines = 0
951
952 # only query if necessary
953 if columns <= 0 or lines <= 0:
954 try:
955 size = os.get_terminal_size(sys.__stdout__.fileno())
956 except (NameError, OSError):
957 size = os.terminal_size(fallback)
958 if columns <= 0:
959 columns = size.columns
960 if lines <= 0:
961 lines = size.lines
962
963 return os.terminal_size((columns, lines))
Brian Curtinc57a3452012-06-22 16:00:30 -0500964
965def which(cmd, mode=os.F_OK | os.X_OK, path=None):
966 """Given a file, mode, and a path string, return the path whichs conform
967 to the given mode on the path."""
968 # Check that a given file can be accessed with the correct mode.
969 # Additionally check that `file` is not a directory, as on Windows
970 # directories pass the os.access check.
971 def _access_check(fn, mode):
972 if (os.path.exists(fn) and os.access(fn, mode)
973 and not os.path.isdir(fn)):
974 return True
975 return False
976
977 # Short circuit. If we're given a full path which matches the mode
978 # and it exists, we're done here.
979 if _access_check(cmd, mode):
980 return cmd
981
982 path = (path or os.environ.get("PATH", os.defpath)).split(os.pathsep)
983
984 if sys.platform == "win32":
985 # The current directory takes precedence on Windows.
986 if not os.curdir in path:
987 path.insert(0, os.curdir)
988
989 # PATHEXT is necessary to check on Windows.
990 pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
991 # See if the given file matches any of the expected path extensions.
992 # This will allow us to short circuit when given "python.exe".
993 matches = [cmd for ext in pathext if cmd.lower().endswith(ext.lower())]
994 # If it does match, only test that one, otherwise we have to try others.
995 files = [cmd + ext.lower() for ext in pathext] if not matches else [cmd]
996 else:
997 # On other platforms you don't have things like PATHEXT to tell you
998 # what file suffixes are executable, so just pass on cmd as-is.
999 files = [cmd]
1000
1001 seen = set()
1002 for dir in path:
Antoine Pitrou07c24d12012-06-22 23:33:05 +02001003 dir = os.path.normcase(dir)
Brian Curtinc57a3452012-06-22 16:00:30 -05001004 if not dir in seen:
1005 seen.add(dir)
1006 for thefile in files:
1007 name = os.path.join(dir, thefile)
1008 if _access_check(name, mode):
1009 return name
1010 return None