blob: 468ffcb5b7cc2d177e8dd9b8d47539a76543fb10 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
20except ImportError:
21 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
25except ImportError:
26 getpwnam = None
27
28try:
29 from grp import getgrnam
30except ImportError:
31 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Éric Araujo0ac4a5d2011-09-01 08:31:51 +020039 "ignore_patterns", "chown"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020040 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000041
Neal Norwitz4ce69a52005-09-01 00:45:28 +000042class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000043 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Antoine Pitrou7fff0962009-05-01 21:09:44 +000045class SpecialFileError(EnvironmentError):
46 """Raised when trying to do a kind of operation (e.g. copying) which is
47 not supported on a special file (e.g. a named pipe)"""
48
Tarek Ziadé396fad72010-02-23 05:30:31 +000049class ExecError(EnvironmentError):
50 """Raised when a command could not be executed"""
51
Tarek Ziadé6ac91722010-04-28 17:51:36 +000052class ReadError(EnvironmentError):
53 """Raised when an archive cannot be read"""
54
55class RegistryError(Exception):
56 """Raised when a registery operation with the archiving
57 and unpacking registeries fails"""
58
59
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000060try:
61 WindowsError
62except NameError:
63 WindowsError = None
64
Greg Stein42bb8b32000-07-12 09:55:30 +000065def copyfileobj(fsrc, fdst, length=16*1024):
66 """copy data from file-like object fsrc to file-like object fdst"""
67 while 1:
68 buf = fsrc.read(length)
69 if not buf:
70 break
71 fdst.write(buf)
72
Johannes Gijsbers46f14592004-08-14 13:30:02 +000073def _samefile(src, dst):
74 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000075 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000076 try:
77 return os.path.samefile(src, dst)
78 except OSError:
79 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000080
81 # All other platforms: check for same pathname.
82 return (os.path.normcase(os.path.abspath(src)) ==
83 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000084
Guido van Rossumc6360141990-10-13 19:23:40 +000085def copyfile(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +000086 """Copy data from src to dst"""
Johannes Gijsbers46f14592004-08-14 13:30:02 +000087 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000088 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000089
Antoine Pitrou7fff0962009-05-01 21:09:44 +000090 for fn in [src, dst]:
91 try:
92 st = os.stat(fn)
93 except OSError:
94 # File most likely does not exist
95 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +000096 else:
97 # XXX What about other special files? (sockets, devices...)
98 if stat.S_ISFIFO(st.st_mode):
99 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000100
Tarek Ziadéae4d5c62010-05-05 22:27:31 +0000101 with open(src, 'rb') as fsrc:
102 with open(dst, 'wb') as fdst:
103 copyfileobj(fsrc, fdst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000104
Guido van Rossumc6360141990-10-13 19:23:40 +0000105def copymode(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000106 """Copy mode bits from src to dst"""
Tim Peters0c947242001-01-21 20:00:00 +0000107 if hasattr(os, 'chmod'):
108 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000109 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000110 os.chmod(dst, mode)
Guido van Rossumc6360141990-10-13 19:23:40 +0000111
Guido van Rossumc6360141990-10-13 19:23:40 +0000112def copystat(src, dst):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000113 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
Guido van Rossuma2baf461997-04-29 14:06:46 +0000114 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000115 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000116 if hasattr(os, 'utime'):
Walter Dörwald294bbf32002-06-06 09:48:13 +0000117 os.utime(dst, (st.st_atime, st.st_mtime))
Tim Peters0c947242001-01-21 20:00:00 +0000118 if hasattr(os, 'chmod'):
119 os.chmod(dst, mode)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000120 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000121 try:
122 os.chflags(dst, st.st_flags)
123 except OSError as why:
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000124 if (not hasattr(errno, 'EOPNOTSUPP') or
125 why.errno != errno.EOPNOTSUPP):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000126 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000127
Guido van Rossumc6360141990-10-13 19:23:40 +0000128def copy(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000129 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000130
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000131 The destination may be a directory.
132
133 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000134 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000135 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000136 copyfile(src, dst)
137 copymode(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000138
Guido van Rossumc6360141990-10-13 19:23:40 +0000139def copy2(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000140 """Copy data and all stat info ("cp -p src dst").
141
142 The destination may be a directory.
143
144 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000145 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000146 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000147 copyfile(src, dst)
148 copystat(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000149
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150def ignore_patterns(*patterns):
151 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000152
Georg Brandl2ee470f2008-07-16 12:55:28 +0000153 Patterns is a sequence of glob-style patterns
154 that are used to exclude files"""
155 def _ignore_patterns(path, names):
156 ignored_names = []
157 for pattern in patterns:
158 ignored_names.extend(fnmatch.filter(names, pattern))
159 return set(ignored_names)
160 return _ignore_patterns
161
Tarek Ziadéfb437512010-04-20 08:57:33 +0000162def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
163 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000164 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000165
166 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000167 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000168
169 If the optional symlinks flag is true, symbolic links in the
170 source tree result in symbolic links in the destination tree; if
171 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000172 links are copied. If the file pointed by the symlink doesn't
173 exist, an exception will be added in the list of errors raised in
174 an Error exception at the end of the copy process.
175
176 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000177 want to silence this exception. Notice that this has no effect on
178 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000179
Georg Brandl2ee470f2008-07-16 12:55:28 +0000180 The optional ignore argument is a callable. If given, it
181 is called with the `src` parameter, which is the directory
182 being visited by copytree(), and `names` which is the list of
183 `src` contents, as returned by os.listdir():
184
185 callable(src, names) -> ignored_names
186
187 Since copytree() is called recursively, the callable will be
188 called once for each directory that is copied. It returns a
189 list of names relative to the `src` directory that should
190 not be copied.
191
Tarek Ziadé5340db32010-04-19 22:30:51 +0000192 The optional copy_function argument is a callable that will be used
193 to copy each file. It will be called with the source path and the
194 destination path as arguments. By default, copy2() is used, but any
195 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000196
197 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000198 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000199 if ignore is not None:
200 ignored_names = ignore(src, names)
201 else:
202 ignored_names = set()
203
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000204 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000205 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000206 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000207 if name in ignored_names:
208 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000209 srcname = os.path.join(src, name)
210 dstname = os.path.join(dst, name)
211 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000212 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000213 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000214 if symlinks:
215 os.symlink(linkto, dstname)
216 else:
217 # ignore dangling symlink if the flag is on
218 if not os.path.exists(linkto) and ignore_dangling_symlinks:
219 continue
220 # otherwise let the copy occurs. copy2 will raise an error
221 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000222 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000223 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000224 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000225 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000226 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000227 # catch the Error from the recursive copytree so that we can
228 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000229 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000230 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000231 except EnvironmentError as why:
232 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000233 try:
234 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000235 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000236 if WindowsError is not None and isinstance(why, WindowsError):
237 # Copying file access times may fail on Windows
238 pass
239 else:
240 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000241 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000242 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000243
Barry Warsaw234d9a92003-01-24 17:36:15 +0000244def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000245 """Recursively delete a directory tree.
246
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000247 If ignore_errors is set, errors are ignored; otherwise, if onerror
248 is set, it is called to handle the error with arguments (func,
249 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
250 path is the argument to that function that caused it to fail; and
251 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
252 is false and onerror is None, an exception is raised.
253
Guido van Rossumd7673291998-02-06 21:38:09 +0000254 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000255 if ignore_errors:
256 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000257 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000258 elif onerror is None:
259 def onerror(*args):
260 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000261 try:
262 if os.path.islink(path):
263 # symlinks to directories are forbidden, see bug #1669
264 raise OSError("Cannot call rmtree on a symbolic link")
265 except OSError:
266 onerror(os.path.islink, path, sys.exc_info())
267 # can't continue even if onerror hook returns
268 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000269 names = []
270 try:
271 names = os.listdir(path)
Éric Araujocfcc9772011-08-10 20:54:33 +0200272 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000273 onerror(os.listdir, path, sys.exc_info())
274 for name in names:
275 fullname = os.path.join(path, name)
276 try:
277 mode = os.lstat(fullname).st_mode
278 except os.error:
279 mode = 0
280 if stat.S_ISDIR(mode):
281 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000282 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000283 try:
284 os.remove(fullname)
Éric Araujocfcc9772011-08-10 20:54:33 +0200285 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000286 onerror(os.remove, fullname, sys.exc_info())
287 try:
288 os.rmdir(path)
289 except os.error:
290 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000291
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000292
Christian Heimesada8c3b2008-03-18 18:26:33 +0000293def _basename(path):
294 # A basename() variant which first strips the trailing slash, if present.
295 # Thus we always get the last component of the path, even for directories.
296 return os.path.basename(path.rstrip(os.path.sep))
297
298def move(src, dst):
299 """Recursively move a file or directory to another location. This is
300 similar to the Unix "mv" command.
301
302 If the destination is a directory or a symlink to a directory, the source
303 is moved inside the directory. The destination path must not already
304 exist.
305
306 If the destination already exists but is not a directory, it may be
307 overwritten depending on os.rename() semantics.
308
309 If the destination is on our current filesystem, then rename() is used.
310 Otherwise, src is copied to the destination and then removed.
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000311 A lot more could be done here... A look at a mv.c shows a lot of
312 the issues this implementation glosses over.
313
314 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000315 real_dst = dst
316 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200317 if _samefile(src, dst):
318 # We might be on a case insensitive filesystem,
319 # perform the rename anyway.
320 os.rename(src, dst)
321 return
322
Christian Heimesada8c3b2008-03-18 18:26:33 +0000323 real_dst = os.path.join(dst, _basename(src))
324 if os.path.exists(real_dst):
325 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000326 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000327 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200328 except OSError:
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000329 if os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000330 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000331 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000332 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000333 rmtree(src)
334 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000335 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000336 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000337
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000338def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000339 src = abspath(src)
340 dst = abspath(dst)
341 if not src.endswith(os.path.sep):
342 src += os.path.sep
343 if not dst.endswith(os.path.sep):
344 dst += os.path.sep
345 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000346
347def _get_gid(name):
348 """Returns a gid, given a group name."""
349 if getgrnam is None or name is None:
350 return None
351 try:
352 result = getgrnam(name)
353 except KeyError:
354 result = None
355 if result is not None:
356 return result[2]
357 return None
358
359def _get_uid(name):
360 """Returns an uid, given a user name."""
361 if getpwnam is None or name is None:
362 return None
363 try:
364 result = getpwnam(name)
365 except KeyError:
366 result = None
367 if result is not None:
368 return result[2]
369 return None
370
371def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
372 owner=None, group=None, logger=None):
373 """Create a (possibly compressed) tar file from all the files under
374 'base_dir'.
375
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000376 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000377
378 'owner' and 'group' can be used to define an owner and a group for the
379 archive that is being built. If not provided, the current owner and group
380 will be used.
381
Éric Araujo4433a5f2010-12-15 20:26:30 +0000382 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000383 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000384
385 Returns the output filename.
386 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000387 tar_compression = {'gzip': 'gz', None: ''}
388 compress_ext = {'gzip': '.gz'}
389
390 if _BZ2_SUPPORTED:
391 tar_compression['bzip2'] = 'bz2'
392 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000393
394 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200395 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000396 raise ValueError("bad value for 'compress', or compression format not "
397 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000398
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000399 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000400 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000401
Tarek Ziadé396fad72010-02-23 05:30:31 +0000402 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000403 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200404 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000405 if not dry_run:
406 os.makedirs(archive_dir)
407
Tarek Ziadé396fad72010-02-23 05:30:31 +0000408 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000409 if logger is not None:
410 logger.info('Creating tar archive')
411
412 uid = _get_uid(owner)
413 gid = _get_gid(group)
414
415 def _set_uid_gid(tarinfo):
416 if gid is not None:
417 tarinfo.gid = gid
418 tarinfo.gname = group
419 if uid is not None:
420 tarinfo.uid = uid
421 tarinfo.uname = owner
422 return tarinfo
423
424 if not dry_run:
425 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
426 try:
427 tar.add(base_dir, filter=_set_uid_gid)
428 finally:
429 tar.close()
430
Tarek Ziadé396fad72010-02-23 05:30:31 +0000431 return archive_name
432
Tarek Ziadée2124162010-04-21 13:35:21 +0000433def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000434 # XXX see if we want to keep an external call here
435 if verbose:
436 zipoptions = "-r"
437 else:
438 zipoptions = "-rq"
439 from distutils.errors import DistutilsExecError
440 from distutils.spawn import spawn
441 try:
442 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
443 except DistutilsExecError:
444 # XXX really should distinguish between "couldn't find
445 # external 'zip' command" and "zip failed".
446 raise ExecError("unable to create zip file '%s': "
447 "could neither import the 'zipfile' module nor "
448 "find a standalone zip utility") % zip_filename
449
450def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
451 """Create a zip file from all the files under 'base_dir'.
452
Éric Araujo4433a5f2010-12-15 20:26:30 +0000453 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000454 "zipfile" Python module (if available) or the InfoZIP "zip" utility
455 (if installed and found on the default search path). If neither tool is
456 available, raises ExecError. Returns the name of the output zip
457 file.
458 """
459 zip_filename = base_name + ".zip"
460 archive_dir = os.path.dirname(base_name)
461
462 if not os.path.exists(archive_dir):
463 if logger is not None:
464 logger.info("creating %s", archive_dir)
465 if not dry_run:
466 os.makedirs(archive_dir)
467
468 # If zipfile module is not available, try spawning an external 'zip'
469 # command.
470 try:
471 import zipfile
472 except ImportError:
473 zipfile = None
474
475 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000476 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000477 else:
478 if logger is not None:
479 logger.info("creating '%s' and adding '%s' to it",
480 zip_filename, base_dir)
481
482 if not dry_run:
483 zip = zipfile.ZipFile(zip_filename, "w",
484 compression=zipfile.ZIP_DEFLATED)
485
486 for dirpath, dirnames, filenames in os.walk(base_dir):
487 for name in filenames:
488 path = os.path.normpath(os.path.join(dirpath, name))
489 if os.path.isfile(path):
490 zip.write(path, path)
491 if logger is not None:
492 logger.info("adding '%s'", path)
493 zip.close()
494
495 return zip_filename
496
497_ARCHIVE_FORMATS = {
498 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
499 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000500 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200501 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000502 }
503
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000504if _BZ2_SUPPORTED:
505 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
506 "bzip2'ed tar-file")
507
Tarek Ziadé396fad72010-02-23 05:30:31 +0000508def get_archive_formats():
509 """Returns a list of supported formats for archiving and unarchiving.
510
511 Each element of the returned sequence is a tuple (name, description)
512 """
513 formats = [(name, registry[2]) for name, registry in
514 _ARCHIVE_FORMATS.items()]
515 formats.sort()
516 return formats
517
518def register_archive_format(name, function, extra_args=None, description=''):
519 """Registers an archive format.
520
521 name is the name of the format. function is the callable that will be
522 used to create archives. If provided, extra_args is a sequence of
523 (name, value) tuples that will be passed as arguments to the callable.
524 description can be provided to describe the format, and will be returned
525 by the get_archive_formats() function.
526 """
527 if extra_args is None:
528 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200529 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000530 raise TypeError('The %s object is not callable' % function)
531 if not isinstance(extra_args, (tuple, list)):
532 raise TypeError('extra_args needs to be a sequence')
533 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200534 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000535 raise TypeError('extra_args elements are : (arg_name, value)')
536
537 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
538
539def unregister_archive_format(name):
540 del _ARCHIVE_FORMATS[name]
541
542def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
543 dry_run=0, owner=None, group=None, logger=None):
544 """Create an archive file (eg. zip or tar).
545
546 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000547 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
548 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000549
550 'root_dir' is a directory that will be the root directory of the
551 archive; ie. we typically chdir into 'root_dir' before creating the
552 archive. 'base_dir' is the directory where we start archiving from;
553 ie. 'base_dir' will be the common prefix of all files and
554 directories in the archive. 'root_dir' and 'base_dir' both default
555 to the current directory. Returns the name of the archive file.
556
557 'owner' and 'group' are used when creating a tar archive. By default,
558 uses the current owner and group.
559 """
560 save_cwd = os.getcwd()
561 if root_dir is not None:
562 if logger is not None:
563 logger.debug("changing into '%s'", root_dir)
564 base_name = os.path.abspath(base_name)
565 if not dry_run:
566 os.chdir(root_dir)
567
568 if base_dir is None:
569 base_dir = os.curdir
570
571 kwargs = {'dry_run': dry_run, 'logger': logger}
572
573 try:
574 format_info = _ARCHIVE_FORMATS[format]
575 except KeyError:
576 raise ValueError("unknown archive format '%s'" % format)
577
578 func = format_info[0]
579 for arg, val in format_info[1]:
580 kwargs[arg] = val
581
582 if format != 'zip':
583 kwargs['owner'] = owner
584 kwargs['group'] = group
585
586 try:
587 filename = func(base_name, base_dir, **kwargs)
588 finally:
589 if root_dir is not None:
590 if logger is not None:
591 logger.debug("changing back to '%s'", save_cwd)
592 os.chdir(save_cwd)
593
594 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000595
596
597def get_unpack_formats():
598 """Returns a list of supported formats for unpacking.
599
600 Each element of the returned sequence is a tuple
601 (name, extensions, description)
602 """
603 formats = [(name, info[0], info[3]) for name, info in
604 _UNPACK_FORMATS.items()]
605 formats.sort()
606 return formats
607
608def _check_unpack_options(extensions, function, extra_args):
609 """Checks what gets registered as an unpacker."""
610 # first make sure no other unpacker is registered for this extension
611 existing_extensions = {}
612 for name, info in _UNPACK_FORMATS.items():
613 for ext in info[0]:
614 existing_extensions[ext] = name
615
616 for extension in extensions:
617 if extension in existing_extensions:
618 msg = '%s is already registered for "%s"'
619 raise RegistryError(msg % (extension,
620 existing_extensions[extension]))
621
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200622 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000623 raise TypeError('The registered function must be a callable')
624
625
626def register_unpack_format(name, extensions, function, extra_args=None,
627 description=''):
628 """Registers an unpack format.
629
630 `name` is the name of the format. `extensions` is a list of extensions
631 corresponding to the format.
632
633 `function` is the callable that will be
634 used to unpack archives. The callable will receive archives to unpack.
635 If it's unable to handle an archive, it needs to raise a ReadError
636 exception.
637
638 If provided, `extra_args` is a sequence of
639 (name, value) tuples that will be passed as arguments to the callable.
640 description can be provided to describe the format, and will be returned
641 by the get_unpack_formats() function.
642 """
643 if extra_args is None:
644 extra_args = []
645 _check_unpack_options(extensions, function, extra_args)
646 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
647
648def unregister_unpack_format(name):
649 """Removes the pack format from the registery."""
650 del _UNPACK_FORMATS[name]
651
652def _ensure_directory(path):
653 """Ensure that the parent directory of `path` exists"""
654 dirname = os.path.dirname(path)
655 if not os.path.isdir(dirname):
656 os.makedirs(dirname)
657
658def _unpack_zipfile(filename, extract_dir):
659 """Unpack zip `filename` to `extract_dir`
660 """
661 try:
662 import zipfile
663 except ImportError:
664 raise ReadError('zlib not supported, cannot unpack this archive.')
665
666 if not zipfile.is_zipfile(filename):
667 raise ReadError("%s is not a zip file" % filename)
668
669 zip = zipfile.ZipFile(filename)
670 try:
671 for info in zip.infolist():
672 name = info.filename
673
674 # don't extract absolute paths or ones with .. in them
675 if name.startswith('/') or '..' in name:
676 continue
677
678 target = os.path.join(extract_dir, *name.split('/'))
679 if not target:
680 continue
681
682 _ensure_directory(target)
683 if not name.endswith('/'):
684 # file
685 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200686 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000687 try:
688 f.write(data)
689 finally:
690 f.close()
691 del data
692 finally:
693 zip.close()
694
695def _unpack_tarfile(filename, extract_dir):
696 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
697 """
698 try:
699 tarobj = tarfile.open(filename)
700 except tarfile.TarError:
701 raise ReadError(
702 "%s is not a compressed or uncompressed tar file" % filename)
703 try:
704 tarobj.extractall(extract_dir)
705 finally:
706 tarobj.close()
707
708_UNPACK_FORMATS = {
709 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000710 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
711 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
712 }
713
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000714if _BZ2_SUPPORTED:
715 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
716 "bzip2'ed tar-file")
717
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000718def _find_unpack_format(filename):
719 for name, info in _UNPACK_FORMATS.items():
720 for extension in info[0]:
721 if filename.endswith(extension):
722 return name
723 return None
724
725def unpack_archive(filename, extract_dir=None, format=None):
726 """Unpack an archive.
727
728 `filename` is the name of the archive.
729
730 `extract_dir` is the name of the target directory, where the archive
731 is unpacked. If not provided, the current working directory is used.
732
733 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
734 other registered format. If not provided, unpack_archive will use the
735 filename extension and see if an unpacker was registered for that
736 extension.
737
738 In case none is found, a ValueError is raised.
739 """
740 if extract_dir is None:
741 extract_dir = os.getcwd()
742
743 if format is not None:
744 try:
745 format_info = _UNPACK_FORMATS[format]
746 except KeyError:
747 raise ValueError("Unknown unpack format '{0}'".format(format))
748
Nick Coghlanabf202d2011-03-16 13:52:20 -0400749 func = format_info[1]
750 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000751 else:
752 # we need to look at the registered unpackers supported extensions
753 format = _find_unpack_format(filename)
754 if format is None:
755 raise ReadError("Unknown archive format '{0}'".format(filename))
756
757 func = _UNPACK_FORMATS[format][1]
758 kwargs = dict(_UNPACK_FORMATS[format][2])
759 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200760
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200761
762if hasattr(os, 'statvfs'):
763
764 __all__.append('disk_usage')
765 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200766
767 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200768 """Return disk usage statistics about the given path.
769
770 Returned valus is a named tuple with attributes 'total', 'used' and
771 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200772 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200773 st = os.statvfs(path)
774 free = st.f_bavail * st.f_frsize
775 total = st.f_blocks * st.f_frsize
776 used = (st.f_blocks - st.f_bfree) * st.f_frsize
777 return _ntuple_diskusage(total, used, free)
778
779elif os.name == 'nt':
780
781 import nt
782 __all__.append('disk_usage')
783 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
784
785 def disk_usage(path):
786 """Return disk usage statistics about the given path.
787
788 Returned valus is a named tuple with attributes 'total', 'used' and
789 'free', which are the amount of total, used and free space, in bytes.
790 """
791 total, free = nt._getdiskusage(path)
792 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200793 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200794
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200795
Sandro Tosid902a142011-08-22 23:28:27 +0200796def chown(path, user=None, group=None):
797 """Change owner user and group of the given path.
798
799 user and group can be the uid/gid or the user/group names, and in that case,
800 they are converted to their respective uid/gid.
801 """
802
803 if user is None and group is None:
804 raise ValueError("user and/or group must be set")
805
806 _user = user
807 _group = group
808
809 # -1 means don't change it
810 if user is None:
811 _user = -1
812 # user can either be an int (the uid) or a string (the system username)
813 elif isinstance(user, str):
814 _user = _get_uid(user)
815 if _user is None:
816 raise LookupError("no such user: {!r}".format(user))
817
818 if group is None:
819 _group = -1
820 elif not isinstance(group, int):
821 _group = _get_gid(group)
822 if _group is None:
823 raise LookupError("no such group: {!r}".format(group))
824
825 os.chown(path, _user, _group)