blob: 601d9c2b06ed1cd3f2e42f816382a3af031728a0 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
18 _BZ2_SUPPORTED = True
19except ImportError:
20 _BZ2_SUPPORTED = False
21
22try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000023 from pwd import getpwnam
24except ImportError:
25 getpwnam = None
26
27try:
28 from grp import getgrnam
29except ImportError:
30 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000031
Tarek Ziadéc3399782010-02-23 05:39:18 +000032__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
33 "copytree", "move", "rmtree", "Error", "SpecialFileError",
34 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000035 "register_archive_format", "unregister_archive_format",
36 "get_unpack_formats", "register_unpack_format",
37 "unregister_unpack_format", "unpack_archive"]
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000038
Neal Norwitz4ce69a52005-09-01 00:45:28 +000039class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000040 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000041
Antoine Pitrou7fff0962009-05-01 21:09:44 +000042class SpecialFileError(EnvironmentError):
43 """Raised when trying to do a kind of operation (e.g. copying) which is
44 not supported on a special file (e.g. a named pipe)"""
45
Tarek Ziadé396fad72010-02-23 05:30:31 +000046class ExecError(EnvironmentError):
47 """Raised when a command could not be executed"""
48
Tarek Ziadé6ac91722010-04-28 17:51:36 +000049class ReadError(EnvironmentError):
50 """Raised when an archive cannot be read"""
51
52class RegistryError(Exception):
53 """Raised when a registery operation with the archiving
54 and unpacking registeries fails"""
55
56
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000057try:
58 WindowsError
59except NameError:
60 WindowsError = None
61
Greg Stein42bb8b32000-07-12 09:55:30 +000062def copyfileobj(fsrc, fdst, length=16*1024):
63 """copy data from file-like object fsrc to file-like object fdst"""
64 while 1:
65 buf = fsrc.read(length)
66 if not buf:
67 break
68 fdst.write(buf)
69
Johannes Gijsbers46f14592004-08-14 13:30:02 +000070def _samefile(src, dst):
71 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000072 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000073 try:
74 return os.path.samefile(src, dst)
75 except OSError:
76 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000077
78 # All other platforms: check for same pathname.
79 return (os.path.normcase(os.path.abspath(src)) ==
80 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000081
Guido van Rossumc6360141990-10-13 19:23:40 +000082def copyfile(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +000083 """Copy data from src to dst"""
Johannes Gijsbers46f14592004-08-14 13:30:02 +000084 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000085 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000086
Guido van Rossuma2baf461997-04-29 14:06:46 +000087 fsrc = None
88 fdst = None
Antoine Pitrou7fff0962009-05-01 21:09:44 +000089 for fn in [src, dst]:
90 try:
91 st = os.stat(fn)
92 except OSError:
93 # File most likely does not exist
94 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +000095 else:
96 # XXX What about other special files? (sockets, devices...)
97 if stat.S_ISFIFO(st.st_mode):
98 raise SpecialFileError("`%s` is a named pipe" % fn)
Guido van Rossuma2baf461997-04-29 14:06:46 +000099 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000100 fsrc = open(src, 'rb')
101 fdst = open(dst, 'wb')
Greg Stein42bb8b32000-07-12 09:55:30 +0000102 copyfileobj(fsrc, fdst)
Guido van Rossuma2baf461997-04-29 14:06:46 +0000103 finally:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000104 if fdst:
105 fdst.close()
106 if fsrc:
107 fsrc.close()
Guido van Rossumc6360141990-10-13 19:23:40 +0000108
Guido van Rossumc6360141990-10-13 19:23:40 +0000109def copymode(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000110 """Copy mode bits from src to dst"""
Tim Peters0c947242001-01-21 20:00:00 +0000111 if hasattr(os, 'chmod'):
112 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000113 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000114 os.chmod(dst, mode)
Guido van Rossumc6360141990-10-13 19:23:40 +0000115
Guido van Rossumc6360141990-10-13 19:23:40 +0000116def copystat(src, dst):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000117 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
Guido van Rossuma2baf461997-04-29 14:06:46 +0000118 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000119 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000120 if hasattr(os, 'utime'):
Walter Dörwald294bbf32002-06-06 09:48:13 +0000121 os.utime(dst, (st.st_atime, st.st_mtime))
Tim Peters0c947242001-01-21 20:00:00 +0000122 if hasattr(os, 'chmod'):
123 os.chmod(dst, mode)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000124 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000125 try:
126 os.chflags(dst, st.st_flags)
127 except OSError as why:
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000128 if (not hasattr(errno, 'EOPNOTSUPP') or
129 why.errno != errno.EOPNOTSUPP):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000130 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000131
Guido van Rossumc6360141990-10-13 19:23:40 +0000132def copy(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000133 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000134
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000135 The destination may be a directory.
136
137 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000138 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000139 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000140 copyfile(src, dst)
141 copymode(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000142
Guido van Rossumc6360141990-10-13 19:23:40 +0000143def copy2(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000144 """Copy data and all stat info ("cp -p src dst").
145
146 The destination may be a directory.
147
148 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000149 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000150 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000151 copyfile(src, dst)
152 copystat(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000153
Georg Brandl2ee470f2008-07-16 12:55:28 +0000154def ignore_patterns(*patterns):
155 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000156
Georg Brandl2ee470f2008-07-16 12:55:28 +0000157 Patterns is a sequence of glob-style patterns
158 that are used to exclude files"""
159 def _ignore_patterns(path, names):
160 ignored_names = []
161 for pattern in patterns:
162 ignored_names.extend(fnmatch.filter(names, pattern))
163 return set(ignored_names)
164 return _ignore_patterns
165
Tarek Ziadéfb437512010-04-20 08:57:33 +0000166def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
167 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000168 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000169
170 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000171 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000172
173 If the optional symlinks flag is true, symbolic links in the
174 source tree result in symbolic links in the destination tree; if
175 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000176 links are copied. If the file pointed by the symlink doesn't
177 exist, an exception will be added in the list of errors raised in
178 an Error exception at the end of the copy process.
179
180 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000181 want to silence this exception. Notice that this has no effect on
182 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000183
Georg Brandl2ee470f2008-07-16 12:55:28 +0000184 The optional ignore argument is a callable. If given, it
185 is called with the `src` parameter, which is the directory
186 being visited by copytree(), and `names` which is the list of
187 `src` contents, as returned by os.listdir():
188
189 callable(src, names) -> ignored_names
190
191 Since copytree() is called recursively, the callable will be
192 called once for each directory that is copied. It returns a
193 list of names relative to the `src` directory that should
194 not be copied.
195
Tarek Ziadé5340db32010-04-19 22:30:51 +0000196 The optional copy_function argument is a callable that will be used
197 to copy each file. It will be called with the source path and the
198 destination path as arguments. By default, copy2() is used, but any
199 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000200
201 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000202 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000203 if ignore is not None:
204 ignored_names = ignore(src, names)
205 else:
206 ignored_names = set()
207
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000208 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000209 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000210 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000211 if name in ignored_names:
212 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000213 srcname = os.path.join(src, name)
214 dstname = os.path.join(dst, name)
215 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000216 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000217 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000218 if symlinks:
219 os.symlink(linkto, dstname)
220 else:
221 # ignore dangling symlink if the flag is on
222 if not os.path.exists(linkto) and ignore_dangling_symlinks:
223 continue
224 # otherwise let the copy occurs. copy2 will raise an error
225 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000226 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000227 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000228 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000229 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000230 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000231 # catch the Error from the recursive copytree so that we can
232 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000233 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000234 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000235 except EnvironmentError as why:
236 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000237 try:
238 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000239 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000240 if WindowsError is not None and isinstance(why, WindowsError):
241 # Copying file access times may fail on Windows
242 pass
243 else:
244 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000245 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000246 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000247
Barry Warsaw234d9a92003-01-24 17:36:15 +0000248def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000249 """Recursively delete a directory tree.
250
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000251 If ignore_errors is set, errors are ignored; otherwise, if onerror
252 is set, it is called to handle the error with arguments (func,
253 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
254 path is the argument to that function that caused it to fail; and
255 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
256 is false and onerror is None, an exception is raised.
257
Guido van Rossumd7673291998-02-06 21:38:09 +0000258 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000259 if ignore_errors:
260 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000261 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000262 elif onerror is None:
263 def onerror(*args):
264 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000265 try:
266 if os.path.islink(path):
267 # symlinks to directories are forbidden, see bug #1669
268 raise OSError("Cannot call rmtree on a symbolic link")
269 except OSError:
270 onerror(os.path.islink, path, sys.exc_info())
271 # can't continue even if onerror hook returns
272 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000273 names = []
274 try:
275 names = os.listdir(path)
Guido van Rossumb940e112007-01-10 16:19:56 +0000276 except os.error as err:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000277 onerror(os.listdir, path, sys.exc_info())
278 for name in names:
279 fullname = os.path.join(path, name)
280 try:
281 mode = os.lstat(fullname).st_mode
282 except os.error:
283 mode = 0
284 if stat.S_ISDIR(mode):
285 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000286 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000287 try:
288 os.remove(fullname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000289 except os.error as err:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000290 onerror(os.remove, fullname, sys.exc_info())
291 try:
292 os.rmdir(path)
293 except os.error:
294 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000295
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000296
Christian Heimesada8c3b2008-03-18 18:26:33 +0000297def _basename(path):
298 # A basename() variant which first strips the trailing slash, if present.
299 # Thus we always get the last component of the path, even for directories.
300 return os.path.basename(path.rstrip(os.path.sep))
301
302def move(src, dst):
303 """Recursively move a file or directory to another location. This is
304 similar to the Unix "mv" command.
305
306 If the destination is a directory or a symlink to a directory, the source
307 is moved inside the directory. The destination path must not already
308 exist.
309
310 If the destination already exists but is not a directory, it may be
311 overwritten depending on os.rename() semantics.
312
313 If the destination is on our current filesystem, then rename() is used.
314 Otherwise, src is copied to the destination and then removed.
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000315 A lot more could be done here... A look at a mv.c shows a lot of
316 the issues this implementation glosses over.
317
318 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000319 real_dst = dst
320 if os.path.isdir(dst):
321 real_dst = os.path.join(dst, _basename(src))
322 if os.path.exists(real_dst):
323 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000324 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000325 os.rename(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000326 except OSError:
327 if os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000328 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000329 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000330 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000331 rmtree(src)
332 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000333 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000334 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000335
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000336def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000337 src = abspath(src)
338 dst = abspath(dst)
339 if not src.endswith(os.path.sep):
340 src += os.path.sep
341 if not dst.endswith(os.path.sep):
342 dst += os.path.sep
343 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000344
345def _get_gid(name):
346 """Returns a gid, given a group name."""
347 if getgrnam is None or name is None:
348 return None
349 try:
350 result = getgrnam(name)
351 except KeyError:
352 result = None
353 if result is not None:
354 return result[2]
355 return None
356
357def _get_uid(name):
358 """Returns an uid, given a user name."""
359 if getpwnam is None or name is None:
360 return None
361 try:
362 result = getpwnam(name)
363 except KeyError:
364 result = None
365 if result is not None:
366 return result[2]
367 return None
368
369def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
370 owner=None, group=None, logger=None):
371 """Create a (possibly compressed) tar file from all the files under
372 'base_dir'.
373
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000374 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000375
376 'owner' and 'group' can be used to define an owner and a group for the
377 archive that is being built. If not provided, the current owner and group
378 will be used.
379
380 The output tar file will be named 'base_dir' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000381 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000382
383 Returns the output filename.
384 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000385 tar_compression = {'gzip': 'gz', None: ''}
386 compress_ext = {'gzip': '.gz'}
387
388 if _BZ2_SUPPORTED:
389 tar_compression['bzip2'] = 'bz2'
390 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000391
392 # flags for compression program, each element of list will be an argument
393 if compress is not None and compress not in compress_ext.keys():
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000394 raise ValueError("bad value for 'compress', or compression format not "
395 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000396
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000397 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000398 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000399
Tarek Ziadé396fad72010-02-23 05:30:31 +0000400 if not os.path.exists(archive_dir):
401 logger.info("creating %s" % archive_dir)
402 if not dry_run:
403 os.makedirs(archive_dir)
404
Tarek Ziadé396fad72010-02-23 05:30:31 +0000405 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000406 if logger is not None:
407 logger.info('Creating tar archive')
408
409 uid = _get_uid(owner)
410 gid = _get_gid(group)
411
412 def _set_uid_gid(tarinfo):
413 if gid is not None:
414 tarinfo.gid = gid
415 tarinfo.gname = group
416 if uid is not None:
417 tarinfo.uid = uid
418 tarinfo.uname = owner
419 return tarinfo
420
421 if not dry_run:
422 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
423 try:
424 tar.add(base_dir, filter=_set_uid_gid)
425 finally:
426 tar.close()
427
Tarek Ziadé396fad72010-02-23 05:30:31 +0000428 return archive_name
429
Tarek Ziadée2124162010-04-21 13:35:21 +0000430def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000431 # XXX see if we want to keep an external call here
432 if verbose:
433 zipoptions = "-r"
434 else:
435 zipoptions = "-rq"
436 from distutils.errors import DistutilsExecError
437 from distutils.spawn import spawn
438 try:
439 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
440 except DistutilsExecError:
441 # XXX really should distinguish between "couldn't find
442 # external 'zip' command" and "zip failed".
443 raise ExecError("unable to create zip file '%s': "
444 "could neither import the 'zipfile' module nor "
445 "find a standalone zip utility") % zip_filename
446
447def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
448 """Create a zip file from all the files under 'base_dir'.
449
450 The output zip file will be named 'base_dir' + ".zip". Uses either the
451 "zipfile" Python module (if available) or the InfoZIP "zip" utility
452 (if installed and found on the default search path). If neither tool is
453 available, raises ExecError. Returns the name of the output zip
454 file.
455 """
456 zip_filename = base_name + ".zip"
457 archive_dir = os.path.dirname(base_name)
458
459 if not os.path.exists(archive_dir):
460 if logger is not None:
461 logger.info("creating %s", archive_dir)
462 if not dry_run:
463 os.makedirs(archive_dir)
464
465 # If zipfile module is not available, try spawning an external 'zip'
466 # command.
467 try:
468 import zipfile
469 except ImportError:
470 zipfile = None
471
472 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000473 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000474 else:
475 if logger is not None:
476 logger.info("creating '%s' and adding '%s' to it",
477 zip_filename, base_dir)
478
479 if not dry_run:
480 zip = zipfile.ZipFile(zip_filename, "w",
481 compression=zipfile.ZIP_DEFLATED)
482
483 for dirpath, dirnames, filenames in os.walk(base_dir):
484 for name in filenames:
485 path = os.path.normpath(os.path.join(dirpath, name))
486 if os.path.isfile(path):
487 zip.write(path, path)
488 if logger is not None:
489 logger.info("adding '%s'", path)
490 zip.close()
491
492 return zip_filename
493
494_ARCHIVE_FORMATS = {
495 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
496 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000497 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
498 'zip': (_make_zipfile, [],"ZIP file")
499 }
500
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000501if _BZ2_SUPPORTED:
502 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
503 "bzip2'ed tar-file")
504
Tarek Ziadé396fad72010-02-23 05:30:31 +0000505def get_archive_formats():
506 """Returns a list of supported formats for archiving and unarchiving.
507
508 Each element of the returned sequence is a tuple (name, description)
509 """
510 formats = [(name, registry[2]) for name, registry in
511 _ARCHIVE_FORMATS.items()]
512 formats.sort()
513 return formats
514
515def register_archive_format(name, function, extra_args=None, description=''):
516 """Registers an archive format.
517
518 name is the name of the format. function is the callable that will be
519 used to create archives. If provided, extra_args is a sequence of
520 (name, value) tuples that will be passed as arguments to the callable.
521 description can be provided to describe the format, and will be returned
522 by the get_archive_formats() function.
523 """
524 if extra_args is None:
525 extra_args = []
526 if not isinstance(function, collections.Callable):
527 raise TypeError('The %s object is not callable' % function)
528 if not isinstance(extra_args, (tuple, list)):
529 raise TypeError('extra_args needs to be a sequence')
530 for element in extra_args:
531 if not isinstance(element, (tuple, list)) or len(element) !=2 :
532 raise TypeError('extra_args elements are : (arg_name, value)')
533
534 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
535
536def unregister_archive_format(name):
537 del _ARCHIVE_FORMATS[name]
538
539def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
540 dry_run=0, owner=None, group=None, logger=None):
541 """Create an archive file (eg. zip or tar).
542
543 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000544 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
545 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000546
547 'root_dir' is a directory that will be the root directory of the
548 archive; ie. we typically chdir into 'root_dir' before creating the
549 archive. 'base_dir' is the directory where we start archiving from;
550 ie. 'base_dir' will be the common prefix of all files and
551 directories in the archive. 'root_dir' and 'base_dir' both default
552 to the current directory. Returns the name of the archive file.
553
554 'owner' and 'group' are used when creating a tar archive. By default,
555 uses the current owner and group.
556 """
557 save_cwd = os.getcwd()
558 if root_dir is not None:
559 if logger is not None:
560 logger.debug("changing into '%s'", root_dir)
561 base_name = os.path.abspath(base_name)
562 if not dry_run:
563 os.chdir(root_dir)
564
565 if base_dir is None:
566 base_dir = os.curdir
567
568 kwargs = {'dry_run': dry_run, 'logger': logger}
569
570 try:
571 format_info = _ARCHIVE_FORMATS[format]
572 except KeyError:
573 raise ValueError("unknown archive format '%s'" % format)
574
575 func = format_info[0]
576 for arg, val in format_info[1]:
577 kwargs[arg] = val
578
579 if format != 'zip':
580 kwargs['owner'] = owner
581 kwargs['group'] = group
582
583 try:
584 filename = func(base_name, base_dir, **kwargs)
585 finally:
586 if root_dir is not None:
587 if logger is not None:
588 logger.debug("changing back to '%s'", save_cwd)
589 os.chdir(save_cwd)
590
591 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000592
593
594def get_unpack_formats():
595 """Returns a list of supported formats for unpacking.
596
597 Each element of the returned sequence is a tuple
598 (name, extensions, description)
599 """
600 formats = [(name, info[0], info[3]) for name, info in
601 _UNPACK_FORMATS.items()]
602 formats.sort()
603 return formats
604
605def _check_unpack_options(extensions, function, extra_args):
606 """Checks what gets registered as an unpacker."""
607 # first make sure no other unpacker is registered for this extension
608 existing_extensions = {}
609 for name, info in _UNPACK_FORMATS.items():
610 for ext in info[0]:
611 existing_extensions[ext] = name
612
613 for extension in extensions:
614 if extension in existing_extensions:
615 msg = '%s is already registered for "%s"'
616 raise RegistryError(msg % (extension,
617 existing_extensions[extension]))
618
619 if not isinstance(function, collections.Callable):
620 raise TypeError('The registered function must be a callable')
621
622
623def register_unpack_format(name, extensions, function, extra_args=None,
624 description=''):
625 """Registers an unpack format.
626
627 `name` is the name of the format. `extensions` is a list of extensions
628 corresponding to the format.
629
630 `function` is the callable that will be
631 used to unpack archives. The callable will receive archives to unpack.
632 If it's unable to handle an archive, it needs to raise a ReadError
633 exception.
634
635 If provided, `extra_args` is a sequence of
636 (name, value) tuples that will be passed as arguments to the callable.
637 description can be provided to describe the format, and will be returned
638 by the get_unpack_formats() function.
639 """
640 if extra_args is None:
641 extra_args = []
642 _check_unpack_options(extensions, function, extra_args)
643 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
644
645def unregister_unpack_format(name):
646 """Removes the pack format from the registery."""
647 del _UNPACK_FORMATS[name]
648
649def _ensure_directory(path):
650 """Ensure that the parent directory of `path` exists"""
651 dirname = os.path.dirname(path)
652 if not os.path.isdir(dirname):
653 os.makedirs(dirname)
654
655def _unpack_zipfile(filename, extract_dir):
656 """Unpack zip `filename` to `extract_dir`
657 """
658 try:
659 import zipfile
660 except ImportError:
661 raise ReadError('zlib not supported, cannot unpack this archive.')
662
663 if not zipfile.is_zipfile(filename):
664 raise ReadError("%s is not a zip file" % filename)
665
666 zip = zipfile.ZipFile(filename)
667 try:
668 for info in zip.infolist():
669 name = info.filename
670
671 # don't extract absolute paths or ones with .. in them
672 if name.startswith('/') or '..' in name:
673 continue
674
675 target = os.path.join(extract_dir, *name.split('/'))
676 if not target:
677 continue
678
679 _ensure_directory(target)
680 if not name.endswith('/'):
681 # file
682 data = zip.read(info.filename)
683 f = open(target,'wb')
684 try:
685 f.write(data)
686 finally:
687 f.close()
688 del data
689 finally:
690 zip.close()
691
692def _unpack_tarfile(filename, extract_dir):
693 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
694 """
695 try:
696 tarobj = tarfile.open(filename)
697 except tarfile.TarError:
698 raise ReadError(
699 "%s is not a compressed or uncompressed tar file" % filename)
700 try:
701 tarobj.extractall(extract_dir)
702 finally:
703 tarobj.close()
704
705_UNPACK_FORMATS = {
706 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000707 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
708 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
709 }
710
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000711if _BZ2_SUPPORTED:
712 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
713 "bzip2'ed tar-file")
714
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000715def _find_unpack_format(filename):
716 for name, info in _UNPACK_FORMATS.items():
717 for extension in info[0]:
718 if filename.endswith(extension):
719 return name
720 return None
721
722def unpack_archive(filename, extract_dir=None, format=None):
723 """Unpack an archive.
724
725 `filename` is the name of the archive.
726
727 `extract_dir` is the name of the target directory, where the archive
728 is unpacked. If not provided, the current working directory is used.
729
730 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
731 other registered format. If not provided, unpack_archive will use the
732 filename extension and see if an unpacker was registered for that
733 extension.
734
735 In case none is found, a ValueError is raised.
736 """
737 if extract_dir is None:
738 extract_dir = os.getcwd()
739
740 if format is not None:
741 try:
742 format_info = _UNPACK_FORMATS[format]
743 except KeyError:
744 raise ValueError("Unknown unpack format '{0}'".format(format))
745
746 func = format_info[0]
747 func(filename, extract_dir, **dict(format_info[1]))
748 else:
749 # we need to look at the registered unpackers supported extensions
750 format = _find_unpack_format(filename)
751 if format is None:
752 raise ReadError("Unknown archive format '{0}'".format(filename))
753
754 func = _UNPACK_FORMATS[format][1]
755 kwargs = dict(_UNPACK_FORMATS[format][2])
756 func(filename, extract_dir, **kwargs)