blob: 4b752627fa5d018c6fd8731a3e0f30233c9e6284 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
18 _BZ2_SUPPORTED = True
19except ImportError:
20 _BZ2_SUPPORTED = False
21
22try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000023 from pwd import getpwnam
24except ImportError:
25 getpwnam = None
26
27try:
28 from grp import getgrnam
29except ImportError:
30 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000031
Tarek Ziadéc3399782010-02-23 05:39:18 +000032__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
33 "copytree", "move", "rmtree", "Error", "SpecialFileError",
34 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000035 "register_archive_format", "unregister_archive_format",
36 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020037 "unregister_unpack_format", "unpack_archive",
38 "ignore_patterns"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020039 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000040
Neal Norwitz4ce69a52005-09-01 00:45:28 +000041class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000042 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000043
Antoine Pitrou7fff0962009-05-01 21:09:44 +000044class SpecialFileError(EnvironmentError):
45 """Raised when trying to do a kind of operation (e.g. copying) which is
46 not supported on a special file (e.g. a named pipe)"""
47
Tarek Ziadé396fad72010-02-23 05:30:31 +000048class ExecError(EnvironmentError):
49 """Raised when a command could not be executed"""
50
Tarek Ziadé6ac91722010-04-28 17:51:36 +000051class ReadError(EnvironmentError):
52 """Raised when an archive cannot be read"""
53
54class RegistryError(Exception):
55 """Raised when a registery operation with the archiving
56 and unpacking registeries fails"""
57
58
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000059try:
60 WindowsError
61except NameError:
62 WindowsError = None
63
Greg Stein42bb8b32000-07-12 09:55:30 +000064def copyfileobj(fsrc, fdst, length=16*1024):
65 """copy data from file-like object fsrc to file-like object fdst"""
66 while 1:
67 buf = fsrc.read(length)
68 if not buf:
69 break
70 fdst.write(buf)
71
Johannes Gijsbers46f14592004-08-14 13:30:02 +000072def _samefile(src, dst):
73 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000074 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000075 try:
76 return os.path.samefile(src, dst)
77 except OSError:
78 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000079
80 # All other platforms: check for same pathname.
81 return (os.path.normcase(os.path.abspath(src)) ==
82 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000083
Guido van Rossumc6360141990-10-13 19:23:40 +000084def copyfile(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +000085 """Copy data from src to dst"""
Johannes Gijsbers46f14592004-08-14 13:30:02 +000086 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000087 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000088
Antoine Pitrou7fff0962009-05-01 21:09:44 +000089 for fn in [src, dst]:
90 try:
91 st = os.stat(fn)
92 except OSError:
93 # File most likely does not exist
94 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +000095 else:
96 # XXX What about other special files? (sockets, devices...)
97 if stat.S_ISFIFO(st.st_mode):
98 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +000099
Tarek Ziadéae4d5c62010-05-05 22:27:31 +0000100 with open(src, 'rb') as fsrc:
101 with open(dst, 'wb') as fdst:
102 copyfileobj(fsrc, fdst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000103
Guido van Rossumc6360141990-10-13 19:23:40 +0000104def copymode(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000105 """Copy mode bits from src to dst"""
Tim Peters0c947242001-01-21 20:00:00 +0000106 if hasattr(os, 'chmod'):
107 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000108 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000109 os.chmod(dst, mode)
Guido van Rossumc6360141990-10-13 19:23:40 +0000110
Guido van Rossumc6360141990-10-13 19:23:40 +0000111def copystat(src, dst):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000112 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
Guido van Rossuma2baf461997-04-29 14:06:46 +0000113 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000114 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000115 if hasattr(os, 'utime'):
Walter Dörwald294bbf32002-06-06 09:48:13 +0000116 os.utime(dst, (st.st_atime, st.st_mtime))
Tim Peters0c947242001-01-21 20:00:00 +0000117 if hasattr(os, 'chmod'):
118 os.chmod(dst, mode)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000119 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000120 try:
121 os.chflags(dst, st.st_flags)
122 except OSError as why:
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000123 if (not hasattr(errno, 'EOPNOTSUPP') or
124 why.errno != errno.EOPNOTSUPP):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000125 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000126
Guido van Rossumc6360141990-10-13 19:23:40 +0000127def copy(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000128 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000129
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000130 The destination may be a directory.
131
132 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000133 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000134 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000135 copyfile(src, dst)
136 copymode(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000137
Guido van Rossumc6360141990-10-13 19:23:40 +0000138def copy2(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000139 """Copy data and all stat info ("cp -p src dst").
140
141 The destination may be a directory.
142
143 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000144 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000145 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000146 copyfile(src, dst)
147 copystat(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000148
Georg Brandl2ee470f2008-07-16 12:55:28 +0000149def ignore_patterns(*patterns):
150 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000151
Georg Brandl2ee470f2008-07-16 12:55:28 +0000152 Patterns is a sequence of glob-style patterns
153 that are used to exclude files"""
154 def _ignore_patterns(path, names):
155 ignored_names = []
156 for pattern in patterns:
157 ignored_names.extend(fnmatch.filter(names, pattern))
158 return set(ignored_names)
159 return _ignore_patterns
160
Tarek Ziadéfb437512010-04-20 08:57:33 +0000161def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
162 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000163 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000164
165 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000166 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000167
168 If the optional symlinks flag is true, symbolic links in the
169 source tree result in symbolic links in the destination tree; if
170 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000171 links are copied. If the file pointed by the symlink doesn't
172 exist, an exception will be added in the list of errors raised in
173 an Error exception at the end of the copy process.
174
175 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000176 want to silence this exception. Notice that this has no effect on
177 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000178
Georg Brandl2ee470f2008-07-16 12:55:28 +0000179 The optional ignore argument is a callable. If given, it
180 is called with the `src` parameter, which is the directory
181 being visited by copytree(), and `names` which is the list of
182 `src` contents, as returned by os.listdir():
183
184 callable(src, names) -> ignored_names
185
186 Since copytree() is called recursively, the callable will be
187 called once for each directory that is copied. It returns a
188 list of names relative to the `src` directory that should
189 not be copied.
190
Tarek Ziadé5340db32010-04-19 22:30:51 +0000191 The optional copy_function argument is a callable that will be used
192 to copy each file. It will be called with the source path and the
193 destination path as arguments. By default, copy2() is used, but any
194 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000195
196 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000197 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000198 if ignore is not None:
199 ignored_names = ignore(src, names)
200 else:
201 ignored_names = set()
202
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000203 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000204 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000205 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000206 if name in ignored_names:
207 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000208 srcname = os.path.join(src, name)
209 dstname = os.path.join(dst, name)
210 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000211 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000212 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000213 if symlinks:
214 os.symlink(linkto, dstname)
215 else:
216 # ignore dangling symlink if the flag is on
217 if not os.path.exists(linkto) and ignore_dangling_symlinks:
218 continue
219 # otherwise let the copy occurs. copy2 will raise an error
220 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000221 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000222 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000223 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000224 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000225 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000226 # catch the Error from the recursive copytree so that we can
227 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000228 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000229 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000230 except EnvironmentError as why:
231 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232 try:
233 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000234 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000235 if WindowsError is not None and isinstance(why, WindowsError):
236 # Copying file access times may fail on Windows
237 pass
238 else:
239 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000240 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000241 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000242
Barry Warsaw234d9a92003-01-24 17:36:15 +0000243def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000244 """Recursively delete a directory tree.
245
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000246 If ignore_errors is set, errors are ignored; otherwise, if onerror
247 is set, it is called to handle the error with arguments (func,
248 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
249 path is the argument to that function that caused it to fail; and
250 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
251 is false and onerror is None, an exception is raised.
252
Guido van Rossumd7673291998-02-06 21:38:09 +0000253 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000254 if ignore_errors:
255 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000256 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000257 elif onerror is None:
258 def onerror(*args):
259 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000260 try:
261 if os.path.islink(path):
262 # symlinks to directories are forbidden, see bug #1669
263 raise OSError("Cannot call rmtree on a symbolic link")
264 except OSError:
265 onerror(os.path.islink, path, sys.exc_info())
266 # can't continue even if onerror hook returns
267 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000268 names = []
269 try:
270 names = os.listdir(path)
Éric Araujocfcc9772011-08-10 20:54:33 +0200271 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000272 onerror(os.listdir, path, sys.exc_info())
273 for name in names:
274 fullname = os.path.join(path, name)
275 try:
276 mode = os.lstat(fullname).st_mode
277 except os.error:
278 mode = 0
279 if stat.S_ISDIR(mode):
280 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000281 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000282 try:
283 os.remove(fullname)
Éric Araujocfcc9772011-08-10 20:54:33 +0200284 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000285 onerror(os.remove, fullname, sys.exc_info())
286 try:
287 os.rmdir(path)
288 except os.error:
289 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000290
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000291
Christian Heimesada8c3b2008-03-18 18:26:33 +0000292def _basename(path):
293 # A basename() variant which first strips the trailing slash, if present.
294 # Thus we always get the last component of the path, even for directories.
295 return os.path.basename(path.rstrip(os.path.sep))
296
297def move(src, dst):
298 """Recursively move a file or directory to another location. This is
299 similar to the Unix "mv" command.
300
301 If the destination is a directory or a symlink to a directory, the source
302 is moved inside the directory. The destination path must not already
303 exist.
304
305 If the destination already exists but is not a directory, it may be
306 overwritten depending on os.rename() semantics.
307
308 If the destination is on our current filesystem, then rename() is used.
309 Otherwise, src is copied to the destination and then removed.
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000310 A lot more could be done here... A look at a mv.c shows a lot of
311 the issues this implementation glosses over.
312
313 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000314 real_dst = dst
315 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200316 if _samefile(src, dst):
317 # We might be on a case insensitive filesystem,
318 # perform the rename anyway.
319 os.rename(src, dst)
320 return
321
Christian Heimesada8c3b2008-03-18 18:26:33 +0000322 real_dst = os.path.join(dst, _basename(src))
323 if os.path.exists(real_dst):
324 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000325 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000326 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200327 except OSError:
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000328 if os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000329 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000330 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000331 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000332 rmtree(src)
333 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000334 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000335 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000336
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000337def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000338 src = abspath(src)
339 dst = abspath(dst)
340 if not src.endswith(os.path.sep):
341 src += os.path.sep
342 if not dst.endswith(os.path.sep):
343 dst += os.path.sep
344 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000345
346def _get_gid(name):
347 """Returns a gid, given a group name."""
348 if getgrnam is None or name is None:
349 return None
350 try:
351 result = getgrnam(name)
352 except KeyError:
353 result = None
354 if result is not None:
355 return result[2]
356 return None
357
358def _get_uid(name):
359 """Returns an uid, given a user name."""
360 if getpwnam is None or name is None:
361 return None
362 try:
363 result = getpwnam(name)
364 except KeyError:
365 result = None
366 if result is not None:
367 return result[2]
368 return None
369
370def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
371 owner=None, group=None, logger=None):
372 """Create a (possibly compressed) tar file from all the files under
373 'base_dir'.
374
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000375 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000376
377 'owner' and 'group' can be used to define an owner and a group for the
378 archive that is being built. If not provided, the current owner and group
379 will be used.
380
Éric Araujo4433a5f2010-12-15 20:26:30 +0000381 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000382 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000383
384 Returns the output filename.
385 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000386 tar_compression = {'gzip': 'gz', None: ''}
387 compress_ext = {'gzip': '.gz'}
388
389 if _BZ2_SUPPORTED:
390 tar_compression['bzip2'] = 'bz2'
391 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000392
393 # flags for compression program, each element of list will be an argument
394 if compress is not None and compress not in compress_ext.keys():
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000395 raise ValueError("bad value for 'compress', or compression format not "
396 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000397
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000398 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000399 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000400
Tarek Ziadé396fad72010-02-23 05:30:31 +0000401 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000402 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200403 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000404 if not dry_run:
405 os.makedirs(archive_dir)
406
Tarek Ziadé396fad72010-02-23 05:30:31 +0000407 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000408 if logger is not None:
409 logger.info('Creating tar archive')
410
411 uid = _get_uid(owner)
412 gid = _get_gid(group)
413
414 def _set_uid_gid(tarinfo):
415 if gid is not None:
416 tarinfo.gid = gid
417 tarinfo.gname = group
418 if uid is not None:
419 tarinfo.uid = uid
420 tarinfo.uname = owner
421 return tarinfo
422
423 if not dry_run:
424 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
425 try:
426 tar.add(base_dir, filter=_set_uid_gid)
427 finally:
428 tar.close()
429
Tarek Ziadé396fad72010-02-23 05:30:31 +0000430 return archive_name
431
Tarek Ziadée2124162010-04-21 13:35:21 +0000432def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000433 # XXX see if we want to keep an external call here
434 if verbose:
435 zipoptions = "-r"
436 else:
437 zipoptions = "-rq"
438 from distutils.errors import DistutilsExecError
439 from distutils.spawn import spawn
440 try:
441 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
442 except DistutilsExecError:
443 # XXX really should distinguish between "couldn't find
444 # external 'zip' command" and "zip failed".
445 raise ExecError("unable to create zip file '%s': "
446 "could neither import the 'zipfile' module nor "
447 "find a standalone zip utility") % zip_filename
448
449def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
450 """Create a zip file from all the files under 'base_dir'.
451
Éric Araujo4433a5f2010-12-15 20:26:30 +0000452 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000453 "zipfile" Python module (if available) or the InfoZIP "zip" utility
454 (if installed and found on the default search path). If neither tool is
455 available, raises ExecError. Returns the name of the output zip
456 file.
457 """
458 zip_filename = base_name + ".zip"
459 archive_dir = os.path.dirname(base_name)
460
461 if not os.path.exists(archive_dir):
462 if logger is not None:
463 logger.info("creating %s", archive_dir)
464 if not dry_run:
465 os.makedirs(archive_dir)
466
467 # If zipfile module is not available, try spawning an external 'zip'
468 # command.
469 try:
470 import zipfile
471 except ImportError:
472 zipfile = None
473
474 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000475 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000476 else:
477 if logger is not None:
478 logger.info("creating '%s' and adding '%s' to it",
479 zip_filename, base_dir)
480
481 if not dry_run:
482 zip = zipfile.ZipFile(zip_filename, "w",
483 compression=zipfile.ZIP_DEFLATED)
484
485 for dirpath, dirnames, filenames in os.walk(base_dir):
486 for name in filenames:
487 path = os.path.normpath(os.path.join(dirpath, name))
488 if os.path.isfile(path):
489 zip.write(path, path)
490 if logger is not None:
491 logger.info("adding '%s'", path)
492 zip.close()
493
494 return zip_filename
495
496_ARCHIVE_FORMATS = {
497 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
498 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000499 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
500 'zip': (_make_zipfile, [],"ZIP file")
501 }
502
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000503if _BZ2_SUPPORTED:
504 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
505 "bzip2'ed tar-file")
506
Tarek Ziadé396fad72010-02-23 05:30:31 +0000507def get_archive_formats():
508 """Returns a list of supported formats for archiving and unarchiving.
509
510 Each element of the returned sequence is a tuple (name, description)
511 """
512 formats = [(name, registry[2]) for name, registry in
513 _ARCHIVE_FORMATS.items()]
514 formats.sort()
515 return formats
516
517def register_archive_format(name, function, extra_args=None, description=''):
518 """Registers an archive format.
519
520 name is the name of the format. function is the callable that will be
521 used to create archives. If provided, extra_args is a sequence of
522 (name, value) tuples that will be passed as arguments to the callable.
523 description can be provided to describe the format, and will be returned
524 by the get_archive_formats() function.
525 """
526 if extra_args is None:
527 extra_args = []
528 if not isinstance(function, collections.Callable):
529 raise TypeError('The %s object is not callable' % function)
530 if not isinstance(extra_args, (tuple, list)):
531 raise TypeError('extra_args needs to be a sequence')
532 for element in extra_args:
533 if not isinstance(element, (tuple, list)) or len(element) !=2 :
534 raise TypeError('extra_args elements are : (arg_name, value)')
535
536 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
537
538def unregister_archive_format(name):
539 del _ARCHIVE_FORMATS[name]
540
541def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
542 dry_run=0, owner=None, group=None, logger=None):
543 """Create an archive file (eg. zip or tar).
544
545 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000546 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
547 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000548
549 'root_dir' is a directory that will be the root directory of the
550 archive; ie. we typically chdir into 'root_dir' before creating the
551 archive. 'base_dir' is the directory where we start archiving from;
552 ie. 'base_dir' will be the common prefix of all files and
553 directories in the archive. 'root_dir' and 'base_dir' both default
554 to the current directory. Returns the name of the archive file.
555
556 'owner' and 'group' are used when creating a tar archive. By default,
557 uses the current owner and group.
558 """
559 save_cwd = os.getcwd()
560 if root_dir is not None:
561 if logger is not None:
562 logger.debug("changing into '%s'", root_dir)
563 base_name = os.path.abspath(base_name)
564 if not dry_run:
565 os.chdir(root_dir)
566
567 if base_dir is None:
568 base_dir = os.curdir
569
570 kwargs = {'dry_run': dry_run, 'logger': logger}
571
572 try:
573 format_info = _ARCHIVE_FORMATS[format]
574 except KeyError:
575 raise ValueError("unknown archive format '%s'" % format)
576
577 func = format_info[0]
578 for arg, val in format_info[1]:
579 kwargs[arg] = val
580
581 if format != 'zip':
582 kwargs['owner'] = owner
583 kwargs['group'] = group
584
585 try:
586 filename = func(base_name, base_dir, **kwargs)
587 finally:
588 if root_dir is not None:
589 if logger is not None:
590 logger.debug("changing back to '%s'", save_cwd)
591 os.chdir(save_cwd)
592
593 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000594
595
596def get_unpack_formats():
597 """Returns a list of supported formats for unpacking.
598
599 Each element of the returned sequence is a tuple
600 (name, extensions, description)
601 """
602 formats = [(name, info[0], info[3]) for name, info in
603 _UNPACK_FORMATS.items()]
604 formats.sort()
605 return formats
606
607def _check_unpack_options(extensions, function, extra_args):
608 """Checks what gets registered as an unpacker."""
609 # first make sure no other unpacker is registered for this extension
610 existing_extensions = {}
611 for name, info in _UNPACK_FORMATS.items():
612 for ext in info[0]:
613 existing_extensions[ext] = name
614
615 for extension in extensions:
616 if extension in existing_extensions:
617 msg = '%s is already registered for "%s"'
618 raise RegistryError(msg % (extension,
619 existing_extensions[extension]))
620
621 if not isinstance(function, collections.Callable):
622 raise TypeError('The registered function must be a callable')
623
624
625def register_unpack_format(name, extensions, function, extra_args=None,
626 description=''):
627 """Registers an unpack format.
628
629 `name` is the name of the format. `extensions` is a list of extensions
630 corresponding to the format.
631
632 `function` is the callable that will be
633 used to unpack archives. The callable will receive archives to unpack.
634 If it's unable to handle an archive, it needs to raise a ReadError
635 exception.
636
637 If provided, `extra_args` is a sequence of
638 (name, value) tuples that will be passed as arguments to the callable.
639 description can be provided to describe the format, and will be returned
640 by the get_unpack_formats() function.
641 """
642 if extra_args is None:
643 extra_args = []
644 _check_unpack_options(extensions, function, extra_args)
645 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
646
647def unregister_unpack_format(name):
648 """Removes the pack format from the registery."""
649 del _UNPACK_FORMATS[name]
650
651def _ensure_directory(path):
652 """Ensure that the parent directory of `path` exists"""
653 dirname = os.path.dirname(path)
654 if not os.path.isdir(dirname):
655 os.makedirs(dirname)
656
657def _unpack_zipfile(filename, extract_dir):
658 """Unpack zip `filename` to `extract_dir`
659 """
660 try:
661 import zipfile
662 except ImportError:
663 raise ReadError('zlib not supported, cannot unpack this archive.')
664
665 if not zipfile.is_zipfile(filename):
666 raise ReadError("%s is not a zip file" % filename)
667
668 zip = zipfile.ZipFile(filename)
669 try:
670 for info in zip.infolist():
671 name = info.filename
672
673 # don't extract absolute paths or ones with .. in them
674 if name.startswith('/') or '..' in name:
675 continue
676
677 target = os.path.join(extract_dir, *name.split('/'))
678 if not target:
679 continue
680
681 _ensure_directory(target)
682 if not name.endswith('/'):
683 # file
684 data = zip.read(info.filename)
685 f = open(target,'wb')
686 try:
687 f.write(data)
688 finally:
689 f.close()
690 del data
691 finally:
692 zip.close()
693
694def _unpack_tarfile(filename, extract_dir):
695 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
696 """
697 try:
698 tarobj = tarfile.open(filename)
699 except tarfile.TarError:
700 raise ReadError(
701 "%s is not a compressed or uncompressed tar file" % filename)
702 try:
703 tarobj.extractall(extract_dir)
704 finally:
705 tarobj.close()
706
707_UNPACK_FORMATS = {
708 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000709 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
710 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
711 }
712
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000713if _BZ2_SUPPORTED:
714 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
715 "bzip2'ed tar-file")
716
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000717def _find_unpack_format(filename):
718 for name, info in _UNPACK_FORMATS.items():
719 for extension in info[0]:
720 if filename.endswith(extension):
721 return name
722 return None
723
724def unpack_archive(filename, extract_dir=None, format=None):
725 """Unpack an archive.
726
727 `filename` is the name of the archive.
728
729 `extract_dir` is the name of the target directory, where the archive
730 is unpacked. If not provided, the current working directory is used.
731
732 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
733 other registered format. If not provided, unpack_archive will use the
734 filename extension and see if an unpacker was registered for that
735 extension.
736
737 In case none is found, a ValueError is raised.
738 """
739 if extract_dir is None:
740 extract_dir = os.getcwd()
741
742 if format is not None:
743 try:
744 format_info = _UNPACK_FORMATS[format]
745 except KeyError:
746 raise ValueError("Unknown unpack format '{0}'".format(format))
747
Nick Coghlanabf202d2011-03-16 13:52:20 -0400748 func = format_info[1]
749 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000750 else:
751 # we need to look at the registered unpackers supported extensions
752 format = _find_unpack_format(filename)
753 if format is None:
754 raise ReadError("Unknown archive format '{0}'".format(filename))
755
756 func = _UNPACK_FORMATS[format][1]
757 kwargs = dict(_UNPACK_FORMATS[format][2])
758 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200759
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200760
761if hasattr(os, 'statvfs'):
762
763 __all__.append('disk_usage')
764 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200765
766 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200767 """Return disk usage statistics about the given path.
768
769 Returned valus is a named tuple with attributes 'total', 'used' and
770 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200771 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200772 st = os.statvfs(path)
773 free = st.f_bavail * st.f_frsize
774 total = st.f_blocks * st.f_frsize
775 used = (st.f_blocks - st.f_bfree) * st.f_frsize
776 return _ntuple_diskusage(total, used, free)
777
778elif os.name == 'nt':
779
780 import nt
781 __all__.append('disk_usage')
782 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
783
784 def disk_usage(path):
785 """Return disk usage statistics about the given path.
786
787 Returned valus is a named tuple with attributes 'total', 'used' and
788 'free', which are the amount of total, used and free space, in bytes.
789 """
790 total, free = nt._getdiskusage(path)
791 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200792 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200793
794def chown(path, user=None, group=None):
795 """Change owner user and group of the given path.
796
797 user and group can be the uid/gid or the user/group names, and in that case,
798 they are converted to their respective uid/gid.
799 """
800
801 if user is None and group is None:
802 raise ValueError("user and/or group must be set")
803
804 _user = user
805 _group = group
806
807 # -1 means don't change it
808 if user is None:
809 _user = -1
810 # user can either be an int (the uid) or a string (the system username)
811 elif isinstance(user, str):
812 _user = _get_uid(user)
813 if _user is None:
814 raise LookupError("no such user: {!r}".format(user))
815
816 if group is None:
817 _group = -1
818 elif not isinstance(group, int):
819 _group = _get_gid(group)
820 if _group is None:
821 raise LookupError("no such group: {!r}".format(group))
822
823 os.chown(path, _user, _group)