blob: d1b1af3246af27de78aaf30a78f6ba78b42eb940 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
18 _BZ2_SUPPORTED = True
19except ImportError:
20 _BZ2_SUPPORTED = False
21
22try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000023 from pwd import getpwnam
24except ImportError:
25 getpwnam = None
26
27try:
28 from grp import getgrnam
29except ImportError:
30 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000031
Tarek Ziadéc3399782010-02-23 05:39:18 +000032__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
33 "copytree", "move", "rmtree", "Error", "SpecialFileError",
34 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000035 "register_archive_format", "unregister_archive_format",
36 "get_unpack_formats", "register_unpack_format",
Éric Araujo5fa8e7a2011-08-21 14:29:18 +020037 "unregister_unpack_format", "unpack_archive", "ignore_patterns"]
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000038
Neal Norwitz4ce69a52005-09-01 00:45:28 +000039class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000040 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000041
Antoine Pitrou7fff0962009-05-01 21:09:44 +000042class SpecialFileError(EnvironmentError):
43 """Raised when trying to do a kind of operation (e.g. copying) which is
44 not supported on a special file (e.g. a named pipe)"""
45
Tarek Ziadé396fad72010-02-23 05:30:31 +000046class ExecError(EnvironmentError):
47 """Raised when a command could not be executed"""
48
Tarek Ziadé6ac91722010-04-28 17:51:36 +000049class ReadError(EnvironmentError):
50 """Raised when an archive cannot be read"""
51
52class RegistryError(Exception):
53 """Raised when a registery operation with the archiving
54 and unpacking registeries fails"""
55
56
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000057try:
58 WindowsError
59except NameError:
60 WindowsError = None
61
Greg Stein42bb8b32000-07-12 09:55:30 +000062def copyfileobj(fsrc, fdst, length=16*1024):
63 """copy data from file-like object fsrc to file-like object fdst"""
64 while 1:
65 buf = fsrc.read(length)
66 if not buf:
67 break
68 fdst.write(buf)
69
Johannes Gijsbers46f14592004-08-14 13:30:02 +000070def _samefile(src, dst):
71 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000072 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000073 try:
74 return os.path.samefile(src, dst)
75 except OSError:
76 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000077
78 # All other platforms: check for same pathname.
79 return (os.path.normcase(os.path.abspath(src)) ==
80 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000081
Guido van Rossumc6360141990-10-13 19:23:40 +000082def copyfile(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +000083 """Copy data from src to dst"""
Johannes Gijsbers46f14592004-08-14 13:30:02 +000084 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000085 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000086
Antoine Pitrou7fff0962009-05-01 21:09:44 +000087 for fn in [src, dst]:
88 try:
89 st = os.stat(fn)
90 except OSError:
91 # File most likely does not exist
92 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +000093 else:
94 # XXX What about other special files? (sockets, devices...)
95 if stat.S_ISFIFO(st.st_mode):
96 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +000097
Tarek Ziadéae4d5c62010-05-05 22:27:31 +000098 with open(src, 'rb') as fsrc:
99 with open(dst, 'wb') as fdst:
100 copyfileobj(fsrc, fdst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000101
Guido van Rossumc6360141990-10-13 19:23:40 +0000102def copymode(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000103 """Copy mode bits from src to dst"""
Tim Peters0c947242001-01-21 20:00:00 +0000104 if hasattr(os, 'chmod'):
105 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000106 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000107 os.chmod(dst, mode)
Guido van Rossumc6360141990-10-13 19:23:40 +0000108
Guido van Rossumc6360141990-10-13 19:23:40 +0000109def copystat(src, dst):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000110 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
Guido van Rossuma2baf461997-04-29 14:06:46 +0000111 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000112 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000113 if hasattr(os, 'utime'):
Walter Dörwald294bbf32002-06-06 09:48:13 +0000114 os.utime(dst, (st.st_atime, st.st_mtime))
Tim Peters0c947242001-01-21 20:00:00 +0000115 if hasattr(os, 'chmod'):
116 os.chmod(dst, mode)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000117 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000118 try:
119 os.chflags(dst, st.st_flags)
120 except OSError as why:
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000121 if (not hasattr(errno, 'EOPNOTSUPP') or
122 why.errno != errno.EOPNOTSUPP):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000123 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000124
Guido van Rossumc6360141990-10-13 19:23:40 +0000125def copy(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000126 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000127
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000128 The destination may be a directory.
129
130 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000131 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000132 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000133 copyfile(src, dst)
134 copymode(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000135
Guido van Rossumc6360141990-10-13 19:23:40 +0000136def copy2(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000137 """Copy data and all stat info ("cp -p src dst").
138
139 The destination may be a directory.
140
141 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000142 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000143 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000144 copyfile(src, dst)
145 copystat(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000146
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147def ignore_patterns(*patterns):
148 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000149
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150 Patterns is a sequence of glob-style patterns
151 that are used to exclude files"""
152 def _ignore_patterns(path, names):
153 ignored_names = []
154 for pattern in patterns:
155 ignored_names.extend(fnmatch.filter(names, pattern))
156 return set(ignored_names)
157 return _ignore_patterns
158
Tarek Ziadéfb437512010-04-20 08:57:33 +0000159def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
160 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000161 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000162
163 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000164 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000165
166 If the optional symlinks flag is true, symbolic links in the
167 source tree result in symbolic links in the destination tree; if
168 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000169 links are copied. If the file pointed by the symlink doesn't
170 exist, an exception will be added in the list of errors raised in
171 an Error exception at the end of the copy process.
172
173 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000174 want to silence this exception. Notice that this has no effect on
175 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000176
Georg Brandl2ee470f2008-07-16 12:55:28 +0000177 The optional ignore argument is a callable. If given, it
178 is called with the `src` parameter, which is the directory
179 being visited by copytree(), and `names` which is the list of
180 `src` contents, as returned by os.listdir():
181
182 callable(src, names) -> ignored_names
183
184 Since copytree() is called recursively, the callable will be
185 called once for each directory that is copied. It returns a
186 list of names relative to the `src` directory that should
187 not be copied.
188
Tarek Ziadé5340db32010-04-19 22:30:51 +0000189 The optional copy_function argument is a callable that will be used
190 to copy each file. It will be called with the source path and the
191 destination path as arguments. By default, copy2() is used, but any
192 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000193
194 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000195 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000196 if ignore is not None:
197 ignored_names = ignore(src, names)
198 else:
199 ignored_names = set()
200
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000201 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000202 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000203 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000204 if name in ignored_names:
205 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000206 srcname = os.path.join(src, name)
207 dstname = os.path.join(dst, name)
208 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000209 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000210 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000211 if symlinks:
212 os.symlink(linkto, dstname)
213 else:
214 # ignore dangling symlink if the flag is on
215 if not os.path.exists(linkto) and ignore_dangling_symlinks:
216 continue
217 # otherwise let the copy occurs. copy2 will raise an error
218 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000219 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000220 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000221 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000222 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000223 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000224 # catch the Error from the recursive copytree so that we can
225 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000226 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000227 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000228 except EnvironmentError as why:
229 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 try:
231 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000232 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000233 if WindowsError is not None and isinstance(why, WindowsError):
234 # Copying file access times may fail on Windows
235 pass
236 else:
237 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000238 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000239 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000240
Barry Warsaw234d9a92003-01-24 17:36:15 +0000241def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000242 """Recursively delete a directory tree.
243
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000244 If ignore_errors is set, errors are ignored; otherwise, if onerror
245 is set, it is called to handle the error with arguments (func,
246 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
247 path is the argument to that function that caused it to fail; and
248 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
249 is false and onerror is None, an exception is raised.
250
Guido van Rossumd7673291998-02-06 21:38:09 +0000251 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000252 if ignore_errors:
253 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000254 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000255 elif onerror is None:
256 def onerror(*args):
257 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000258 try:
259 if os.path.islink(path):
260 # symlinks to directories are forbidden, see bug #1669
261 raise OSError("Cannot call rmtree on a symbolic link")
262 except OSError:
263 onerror(os.path.islink, path, sys.exc_info())
264 # can't continue even if onerror hook returns
265 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000266 names = []
267 try:
268 names = os.listdir(path)
Guido van Rossumb940e112007-01-10 16:19:56 +0000269 except os.error as err:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000270 onerror(os.listdir, path, sys.exc_info())
271 for name in names:
272 fullname = os.path.join(path, name)
273 try:
274 mode = os.lstat(fullname).st_mode
275 except os.error:
276 mode = 0
277 if stat.S_ISDIR(mode):
278 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000279 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000280 try:
281 os.remove(fullname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000282 except os.error as err:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000283 onerror(os.remove, fullname, sys.exc_info())
284 try:
285 os.rmdir(path)
286 except os.error:
287 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000288
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000289
Christian Heimesada8c3b2008-03-18 18:26:33 +0000290def _basename(path):
291 # A basename() variant which first strips the trailing slash, if present.
292 # Thus we always get the last component of the path, even for directories.
293 return os.path.basename(path.rstrip(os.path.sep))
294
295def move(src, dst):
296 """Recursively move a file or directory to another location. This is
297 similar to the Unix "mv" command.
298
299 If the destination is a directory or a symlink to a directory, the source
300 is moved inside the directory. The destination path must not already
301 exist.
302
303 If the destination already exists but is not a directory, it may be
304 overwritten depending on os.rename() semantics.
305
306 If the destination is on our current filesystem, then rename() is used.
307 Otherwise, src is copied to the destination and then removed.
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000308 A lot more could be done here... A look at a mv.c shows a lot of
309 the issues this implementation glosses over.
310
311 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000312 real_dst = dst
313 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200314 if _samefile(src, dst):
315 # We might be on a case insensitive filesystem,
316 # perform the rename anyway.
317 os.rename(src, dst)
318 return
319
Christian Heimesada8c3b2008-03-18 18:26:33 +0000320 real_dst = os.path.join(dst, _basename(src))
321 if os.path.exists(real_dst):
322 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000323 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000324 os.rename(src, real_dst)
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200325 except OSError as exc:
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000326 if os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000327 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000328 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000329 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000330 rmtree(src)
331 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000332 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000333 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000334
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000335def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000336 src = abspath(src)
337 dst = abspath(dst)
338 if not src.endswith(os.path.sep):
339 src += os.path.sep
340 if not dst.endswith(os.path.sep):
341 dst += os.path.sep
342 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000343
344def _get_gid(name):
345 """Returns a gid, given a group name."""
346 if getgrnam is None or name is None:
347 return None
348 try:
349 result = getgrnam(name)
350 except KeyError:
351 result = None
352 if result is not None:
353 return result[2]
354 return None
355
356def _get_uid(name):
357 """Returns an uid, given a user name."""
358 if getpwnam is None or name is None:
359 return None
360 try:
361 result = getpwnam(name)
362 except KeyError:
363 result = None
364 if result is not None:
365 return result[2]
366 return None
367
368def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
369 owner=None, group=None, logger=None):
370 """Create a (possibly compressed) tar file from all the files under
371 'base_dir'.
372
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000373 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000374
375 'owner' and 'group' can be used to define an owner and a group for the
376 archive that is being built. If not provided, the current owner and group
377 will be used.
378
Éric Araujo4433a5f2010-12-15 20:26:30 +0000379 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000380 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000381
382 Returns the output filename.
383 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000384 tar_compression = {'gzip': 'gz', None: ''}
385 compress_ext = {'gzip': '.gz'}
386
387 if _BZ2_SUPPORTED:
388 tar_compression['bzip2'] = 'bz2'
389 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000390
391 # flags for compression program, each element of list will be an argument
392 if compress is not None and compress not in compress_ext.keys():
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000393 raise ValueError("bad value for 'compress', or compression format not "
394 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000395
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000396 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000397 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000398
Tarek Ziadé396fad72010-02-23 05:30:31 +0000399 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000400 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200401 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000402 if not dry_run:
403 os.makedirs(archive_dir)
404
Tarek Ziadé396fad72010-02-23 05:30:31 +0000405 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000406 if logger is not None:
407 logger.info('Creating tar archive')
408
409 uid = _get_uid(owner)
410 gid = _get_gid(group)
411
412 def _set_uid_gid(tarinfo):
413 if gid is not None:
414 tarinfo.gid = gid
415 tarinfo.gname = group
416 if uid is not None:
417 tarinfo.uid = uid
418 tarinfo.uname = owner
419 return tarinfo
420
421 if not dry_run:
422 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
423 try:
424 tar.add(base_dir, filter=_set_uid_gid)
425 finally:
426 tar.close()
427
Tarek Ziadé396fad72010-02-23 05:30:31 +0000428 return archive_name
429
Tarek Ziadée2124162010-04-21 13:35:21 +0000430def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000431 # XXX see if we want to keep an external call here
432 if verbose:
433 zipoptions = "-r"
434 else:
435 zipoptions = "-rq"
436 from distutils.errors import DistutilsExecError
437 from distutils.spawn import spawn
438 try:
439 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
440 except DistutilsExecError:
441 # XXX really should distinguish between "couldn't find
442 # external 'zip' command" and "zip failed".
443 raise ExecError("unable to create zip file '%s': "
444 "could neither import the 'zipfile' module nor "
445 "find a standalone zip utility") % zip_filename
446
447def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
448 """Create a zip file from all the files under 'base_dir'.
449
Éric Araujo4433a5f2010-12-15 20:26:30 +0000450 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000451 "zipfile" Python module (if available) or the InfoZIP "zip" utility
452 (if installed and found on the default search path). If neither tool is
453 available, raises ExecError. Returns the name of the output zip
454 file.
455 """
456 zip_filename = base_name + ".zip"
457 archive_dir = os.path.dirname(base_name)
458
459 if not os.path.exists(archive_dir):
460 if logger is not None:
461 logger.info("creating %s", archive_dir)
462 if not dry_run:
463 os.makedirs(archive_dir)
464
465 # If zipfile module is not available, try spawning an external 'zip'
466 # command.
467 try:
468 import zipfile
469 except ImportError:
470 zipfile = None
471
472 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000473 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000474 else:
475 if logger is not None:
476 logger.info("creating '%s' and adding '%s' to it",
477 zip_filename, base_dir)
478
479 if not dry_run:
480 zip = zipfile.ZipFile(zip_filename, "w",
481 compression=zipfile.ZIP_DEFLATED)
482
483 for dirpath, dirnames, filenames in os.walk(base_dir):
484 for name in filenames:
485 path = os.path.normpath(os.path.join(dirpath, name))
486 if os.path.isfile(path):
487 zip.write(path, path)
488 if logger is not None:
489 logger.info("adding '%s'", path)
490 zip.close()
491
492 return zip_filename
493
494_ARCHIVE_FORMATS = {
495 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000496 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
497 'zip': (_make_zipfile, [],"ZIP file")
498 }
499
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000500if _BZ2_SUPPORTED:
501 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
502 "bzip2'ed tar-file")
503
Tarek Ziadé396fad72010-02-23 05:30:31 +0000504def get_archive_formats():
505 """Returns a list of supported formats for archiving and unarchiving.
506
507 Each element of the returned sequence is a tuple (name, description)
508 """
509 formats = [(name, registry[2]) for name, registry in
510 _ARCHIVE_FORMATS.items()]
511 formats.sort()
512 return formats
513
514def register_archive_format(name, function, extra_args=None, description=''):
515 """Registers an archive format.
516
517 name is the name of the format. function is the callable that will be
518 used to create archives. If provided, extra_args is a sequence of
519 (name, value) tuples that will be passed as arguments to the callable.
520 description can be provided to describe the format, and will be returned
521 by the get_archive_formats() function.
522 """
523 if extra_args is None:
524 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200525 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000526 raise TypeError('The %s object is not callable' % function)
527 if not isinstance(extra_args, (tuple, list)):
528 raise TypeError('extra_args needs to be a sequence')
529 for element in extra_args:
530 if not isinstance(element, (tuple, list)) or len(element) !=2 :
531 raise TypeError('extra_args elements are : (arg_name, value)')
532
533 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
534
535def unregister_archive_format(name):
536 del _ARCHIVE_FORMATS[name]
537
538def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
539 dry_run=0, owner=None, group=None, logger=None):
540 """Create an archive file (eg. zip or tar).
541
542 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000543 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
544 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000545
546 'root_dir' is a directory that will be the root directory of the
547 archive; ie. we typically chdir into 'root_dir' before creating the
548 archive. 'base_dir' is the directory where we start archiving from;
549 ie. 'base_dir' will be the common prefix of all files and
550 directories in the archive. 'root_dir' and 'base_dir' both default
551 to the current directory. Returns the name of the archive file.
552
553 'owner' and 'group' are used when creating a tar archive. By default,
554 uses the current owner and group.
555 """
556 save_cwd = os.getcwd()
557 if root_dir is not None:
558 if logger is not None:
559 logger.debug("changing into '%s'", root_dir)
560 base_name = os.path.abspath(base_name)
561 if not dry_run:
562 os.chdir(root_dir)
563
564 if base_dir is None:
565 base_dir = os.curdir
566
567 kwargs = {'dry_run': dry_run, 'logger': logger}
568
569 try:
570 format_info = _ARCHIVE_FORMATS[format]
571 except KeyError:
572 raise ValueError("unknown archive format '%s'" % format)
573
574 func = format_info[0]
575 for arg, val in format_info[1]:
576 kwargs[arg] = val
577
578 if format != 'zip':
579 kwargs['owner'] = owner
580 kwargs['group'] = group
581
582 try:
583 filename = func(base_name, base_dir, **kwargs)
584 finally:
585 if root_dir is not None:
586 if logger is not None:
587 logger.debug("changing back to '%s'", save_cwd)
588 os.chdir(save_cwd)
589
590 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000591
592
593def get_unpack_formats():
594 """Returns a list of supported formats for unpacking.
595
596 Each element of the returned sequence is a tuple
597 (name, extensions, description)
598 """
599 formats = [(name, info[0], info[3]) for name, info in
600 _UNPACK_FORMATS.items()]
601 formats.sort()
602 return formats
603
604def _check_unpack_options(extensions, function, extra_args):
605 """Checks what gets registered as an unpacker."""
606 # first make sure no other unpacker is registered for this extension
607 existing_extensions = {}
608 for name, info in _UNPACK_FORMATS.items():
609 for ext in info[0]:
610 existing_extensions[ext] = name
611
612 for extension in extensions:
613 if extension in existing_extensions:
614 msg = '%s is already registered for "%s"'
615 raise RegistryError(msg % (extension,
616 existing_extensions[extension]))
617
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200618 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000619 raise TypeError('The registered function must be a callable')
620
621
622def register_unpack_format(name, extensions, function, extra_args=None,
623 description=''):
624 """Registers an unpack format.
625
626 `name` is the name of the format. `extensions` is a list of extensions
627 corresponding to the format.
628
629 `function` is the callable that will be
630 used to unpack archives. The callable will receive archives to unpack.
631 If it's unable to handle an archive, it needs to raise a ReadError
632 exception.
633
634 If provided, `extra_args` is a sequence of
635 (name, value) tuples that will be passed as arguments to the callable.
636 description can be provided to describe the format, and will be returned
637 by the get_unpack_formats() function.
638 """
639 if extra_args is None:
640 extra_args = []
641 _check_unpack_options(extensions, function, extra_args)
642 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
643
644def unregister_unpack_format(name):
645 """Removes the pack format from the registery."""
646 del _UNPACK_FORMATS[name]
647
648def _ensure_directory(path):
649 """Ensure that the parent directory of `path` exists"""
650 dirname = os.path.dirname(path)
651 if not os.path.isdir(dirname):
652 os.makedirs(dirname)
653
654def _unpack_zipfile(filename, extract_dir):
655 """Unpack zip `filename` to `extract_dir`
656 """
657 try:
658 import zipfile
659 except ImportError:
660 raise ReadError('zlib not supported, cannot unpack this archive.')
661
662 if not zipfile.is_zipfile(filename):
663 raise ReadError("%s is not a zip file" % filename)
664
665 zip = zipfile.ZipFile(filename)
666 try:
667 for info in zip.infolist():
668 name = info.filename
669
670 # don't extract absolute paths or ones with .. in them
671 if name.startswith('/') or '..' in name:
672 continue
673
674 target = os.path.join(extract_dir, *name.split('/'))
675 if not target:
676 continue
677
678 _ensure_directory(target)
679 if not name.endswith('/'):
680 # file
681 data = zip.read(info.filename)
682 f = open(target,'wb')
683 try:
684 f.write(data)
685 finally:
686 f.close()
687 del data
688 finally:
689 zip.close()
690
691def _unpack_tarfile(filename, extract_dir):
692 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
693 """
694 try:
695 tarobj = tarfile.open(filename)
696 except tarfile.TarError:
697 raise ReadError(
698 "%s is not a compressed or uncompressed tar file" % filename)
699 try:
700 tarobj.extractall(extract_dir)
701 finally:
702 tarobj.close()
703
704_UNPACK_FORMATS = {
705 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000706 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
707 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
708 }
709
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000710if _BZ2_SUPPORTED:
711 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
712 "bzip2'ed tar-file")
713
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000714def _find_unpack_format(filename):
715 for name, info in _UNPACK_FORMATS.items():
716 for extension in info[0]:
717 if filename.endswith(extension):
718 return name
719 return None
720
721def unpack_archive(filename, extract_dir=None, format=None):
722 """Unpack an archive.
723
724 `filename` is the name of the archive.
725
726 `extract_dir` is the name of the target directory, where the archive
727 is unpacked. If not provided, the current working directory is used.
728
729 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
730 other registered format. If not provided, unpack_archive will use the
731 filename extension and see if an unpacker was registered for that
732 extension.
733
734 In case none is found, a ValueError is raised.
735 """
736 if extract_dir is None:
737 extract_dir = os.getcwd()
738
739 if format is not None:
740 try:
741 format_info = _UNPACK_FORMATS[format]
742 except KeyError:
743 raise ValueError("Unknown unpack format '{0}'".format(format))
744
Nick Coghlanabf202d2011-03-16 13:52:20 -0400745 func = format_info[1]
746 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000747 else:
748 # we need to look at the registered unpackers supported extensions
749 format = _find_unpack_format(filename)
750 if format is None:
751 raise ReadError("Unknown archive format '{0}'".format(filename))
752
753 func = _UNPACK_FORMATS[format][1]
754 kwargs = dict(_UNPACK_FORMATS[format][2])
755 func(filename, extract_dir, **kwargs)