blob: 74a4db8735198636b9827923d3ccd7a6a671b314 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
18 _BZ2_SUPPORTED = True
19except ImportError:
20 _BZ2_SUPPORTED = False
21
22try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000023 from pwd import getpwnam
24except ImportError:
25 getpwnam = None
26
27try:
28 from grp import getgrnam
29except ImportError:
30 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000031
Tarek Ziadéc3399782010-02-23 05:39:18 +000032__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
33 "copytree", "move", "rmtree", "Error", "SpecialFileError",
34 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000035 "register_archive_format", "unregister_archive_format",
36 "get_unpack_formats", "register_unpack_format",
37 "unregister_unpack_format", "unpack_archive"]
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000038
Neal Norwitz4ce69a52005-09-01 00:45:28 +000039class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000040 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000041
Antoine Pitrou7fff0962009-05-01 21:09:44 +000042class SpecialFileError(EnvironmentError):
43 """Raised when trying to do a kind of operation (e.g. copying) which is
44 not supported on a special file (e.g. a named pipe)"""
45
Tarek Ziadé396fad72010-02-23 05:30:31 +000046class ExecError(EnvironmentError):
47 """Raised when a command could not be executed"""
48
Tarek Ziadé6ac91722010-04-28 17:51:36 +000049class ReadError(EnvironmentError):
50 """Raised when an archive cannot be read"""
51
52class RegistryError(Exception):
53 """Raised when a registery operation with the archiving
54 and unpacking registeries fails"""
55
56
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000057try:
58 WindowsError
59except NameError:
60 WindowsError = None
61
Greg Stein42bb8b32000-07-12 09:55:30 +000062def copyfileobj(fsrc, fdst, length=16*1024):
63 """copy data from file-like object fsrc to file-like object fdst"""
64 while 1:
65 buf = fsrc.read(length)
66 if not buf:
67 break
68 fdst.write(buf)
69
Johannes Gijsbers46f14592004-08-14 13:30:02 +000070def _samefile(src, dst):
71 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000072 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000073 try:
74 return os.path.samefile(src, dst)
75 except OSError:
76 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000077
78 # All other platforms: check for same pathname.
79 return (os.path.normcase(os.path.abspath(src)) ==
80 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000081
Guido van Rossumc6360141990-10-13 19:23:40 +000082def copyfile(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +000083 """Copy data from src to dst"""
Johannes Gijsbers46f14592004-08-14 13:30:02 +000084 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000085 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000086
Guido van Rossuma2baf461997-04-29 14:06:46 +000087 fsrc = None
88 fdst = None
Antoine Pitrou7fff0962009-05-01 21:09:44 +000089 for fn in [src, dst]:
90 try:
91 st = os.stat(fn)
92 except OSError:
93 # File most likely does not exist
94 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +000095 else:
96 # XXX What about other special files? (sockets, devices...)
97 if stat.S_ISFIFO(st.st_mode):
98 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéae4d5c62010-05-05 22:27:31 +000099 with open(src, 'rb') as fsrc:
100 with open(dst, 'wb') as fdst:
101 copyfileobj(fsrc, fdst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000102
Guido van Rossumc6360141990-10-13 19:23:40 +0000103def copymode(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000104 """Copy mode bits from src to dst"""
Tim Peters0c947242001-01-21 20:00:00 +0000105 if hasattr(os, 'chmod'):
106 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000107 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000108 os.chmod(dst, mode)
Guido van Rossumc6360141990-10-13 19:23:40 +0000109
Guido van Rossumc6360141990-10-13 19:23:40 +0000110def copystat(src, dst):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000111 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
Guido van Rossuma2baf461997-04-29 14:06:46 +0000112 st = os.stat(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000113 mode = stat.S_IMODE(st.st_mode)
Tim Peters0c947242001-01-21 20:00:00 +0000114 if hasattr(os, 'utime'):
Walter Dörwald294bbf32002-06-06 09:48:13 +0000115 os.utime(dst, (st.st_atime, st.st_mtime))
Tim Peters0c947242001-01-21 20:00:00 +0000116 if hasattr(os, 'chmod'):
117 os.chmod(dst, mode)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000118 if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000119 try:
120 os.chflags(dst, st.st_flags)
121 except OSError as why:
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000122 if (not hasattr(errno, 'EOPNOTSUPP') or
123 why.errno != errno.EOPNOTSUPP):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000124 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000125
Guido van Rossumc6360141990-10-13 19:23:40 +0000126def copy(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000127 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000128
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000129 The destination may be a directory.
130
131 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000132 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000134 copyfile(src, dst)
135 copymode(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000136
Guido van Rossumc6360141990-10-13 19:23:40 +0000137def copy2(src, dst):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000138 """Copy data and all stat info ("cp -p src dst").
139
140 The destination may be a directory.
141
142 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000143 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000144 dst = os.path.join(dst, os.path.basename(src))
Guido van Rossuma2baf461997-04-29 14:06:46 +0000145 copyfile(src, dst)
146 copystat(src, dst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000147
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148def ignore_patterns(*patterns):
149 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000150
Georg Brandl2ee470f2008-07-16 12:55:28 +0000151 Patterns is a sequence of glob-style patterns
152 that are used to exclude files"""
153 def _ignore_patterns(path, names):
154 ignored_names = []
155 for pattern in patterns:
156 ignored_names.extend(fnmatch.filter(names, pattern))
157 return set(ignored_names)
158 return _ignore_patterns
159
Tarek Ziadéfb437512010-04-20 08:57:33 +0000160def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
161 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000162 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000163
164 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000165 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000166
167 If the optional symlinks flag is true, symbolic links in the
168 source tree result in symbolic links in the destination tree; if
169 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000170 links are copied. If the file pointed by the symlink doesn't
171 exist, an exception will be added in the list of errors raised in
172 an Error exception at the end of the copy process.
173
174 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000175 want to silence this exception. Notice that this has no effect on
176 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000177
Georg Brandl2ee470f2008-07-16 12:55:28 +0000178 The optional ignore argument is a callable. If given, it
179 is called with the `src` parameter, which is the directory
180 being visited by copytree(), and `names` which is the list of
181 `src` contents, as returned by os.listdir():
182
183 callable(src, names) -> ignored_names
184
185 Since copytree() is called recursively, the callable will be
186 called once for each directory that is copied. It returns a
187 list of names relative to the `src` directory that should
188 not be copied.
189
Tarek Ziadé5340db32010-04-19 22:30:51 +0000190 The optional copy_function argument is a callable that will be used
191 to copy each file. It will be called with the source path and the
192 destination path as arguments. By default, copy2() is used, but any
193 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000194
195 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000196 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000197 if ignore is not None:
198 ignored_names = ignore(src, names)
199 else:
200 ignored_names = set()
201
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000202 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000203 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000204 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000205 if name in ignored_names:
206 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000207 srcname = os.path.join(src, name)
208 dstname = os.path.join(dst, name)
209 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000210 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000211 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000212 if symlinks:
213 os.symlink(linkto, dstname)
214 else:
215 # ignore dangling symlink if the flag is on
216 if not os.path.exists(linkto) and ignore_dangling_symlinks:
217 continue
218 # otherwise let the copy occurs. copy2 will raise an error
219 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000220 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000221 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000222 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000223 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000224 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000225 # catch the Error from the recursive copytree so that we can
226 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000227 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000228 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000229 except EnvironmentError as why:
230 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000231 try:
232 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000233 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000234 if WindowsError is not None and isinstance(why, WindowsError):
235 # Copying file access times may fail on Windows
236 pass
237 else:
238 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000239 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000240 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000241
Barry Warsaw234d9a92003-01-24 17:36:15 +0000242def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000243 """Recursively delete a directory tree.
244
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000245 If ignore_errors is set, errors are ignored; otherwise, if onerror
246 is set, it is called to handle the error with arguments (func,
247 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
248 path is the argument to that function that caused it to fail; and
249 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
250 is false and onerror is None, an exception is raised.
251
Guido van Rossumd7673291998-02-06 21:38:09 +0000252 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000253 if ignore_errors:
254 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000255 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000256 elif onerror is None:
257 def onerror(*args):
258 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000259 try:
260 if os.path.islink(path):
261 # symlinks to directories are forbidden, see bug #1669
262 raise OSError("Cannot call rmtree on a symbolic link")
263 except OSError:
264 onerror(os.path.islink, path, sys.exc_info())
265 # can't continue even if onerror hook returns
266 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000267 names = []
268 try:
269 names = os.listdir(path)
Guido van Rossumb940e112007-01-10 16:19:56 +0000270 except os.error as err:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000271 onerror(os.listdir, path, sys.exc_info())
272 for name in names:
273 fullname = os.path.join(path, name)
274 try:
275 mode = os.lstat(fullname).st_mode
276 except os.error:
277 mode = 0
278 if stat.S_ISDIR(mode):
279 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000280 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000281 try:
282 os.remove(fullname)
Guido van Rossumb940e112007-01-10 16:19:56 +0000283 except os.error as err:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000284 onerror(os.remove, fullname, sys.exc_info())
285 try:
286 os.rmdir(path)
287 except os.error:
288 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000289
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000290
Christian Heimesada8c3b2008-03-18 18:26:33 +0000291def _basename(path):
292 # A basename() variant which first strips the trailing slash, if present.
293 # Thus we always get the last component of the path, even for directories.
294 return os.path.basename(path.rstrip(os.path.sep))
295
296def move(src, dst):
297 """Recursively move a file or directory to another location. This is
298 similar to the Unix "mv" command.
299
300 If the destination is a directory or a symlink to a directory, the source
301 is moved inside the directory. The destination path must not already
302 exist.
303
304 If the destination already exists but is not a directory, it may be
305 overwritten depending on os.rename() semantics.
306
307 If the destination is on our current filesystem, then rename() is used.
308 Otherwise, src is copied to the destination and then removed.
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000309 A lot more could be done here... A look at a mv.c shows a lot of
310 the issues this implementation glosses over.
311
312 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000313 real_dst = dst
314 if os.path.isdir(dst):
315 real_dst = os.path.join(dst, _basename(src))
316 if os.path.exists(real_dst):
317 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000318 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000319 os.rename(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000320 except OSError:
321 if os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000322 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000323 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000324 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000325 rmtree(src)
326 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000327 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000328 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000329
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000330def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000331 src = abspath(src)
332 dst = abspath(dst)
333 if not src.endswith(os.path.sep):
334 src += os.path.sep
335 if not dst.endswith(os.path.sep):
336 dst += os.path.sep
337 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000338
339def _get_gid(name):
340 """Returns a gid, given a group name."""
341 if getgrnam is None or name is None:
342 return None
343 try:
344 result = getgrnam(name)
345 except KeyError:
346 result = None
347 if result is not None:
348 return result[2]
349 return None
350
351def _get_uid(name):
352 """Returns an uid, given a user name."""
353 if getpwnam is None or name is None:
354 return None
355 try:
356 result = getpwnam(name)
357 except KeyError:
358 result = None
359 if result is not None:
360 return result[2]
361 return None
362
363def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
364 owner=None, group=None, logger=None):
365 """Create a (possibly compressed) tar file from all the files under
366 'base_dir'.
367
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000368 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000369
370 'owner' and 'group' can be used to define an owner and a group for the
371 archive that is being built. If not provided, the current owner and group
372 will be used.
373
374 The output tar file will be named 'base_dir' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000375 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000376
377 Returns the output filename.
378 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000379 tar_compression = {'gzip': 'gz', None: ''}
380 compress_ext = {'gzip': '.gz'}
381
382 if _BZ2_SUPPORTED:
383 tar_compression['bzip2'] = 'bz2'
384 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000385
386 # flags for compression program, each element of list will be an argument
387 if compress is not None and compress not in compress_ext.keys():
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000388 raise ValueError("bad value for 'compress', or compression format not "
389 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000390
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000391 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000392 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000393
Tarek Ziadé396fad72010-02-23 05:30:31 +0000394 if not os.path.exists(archive_dir):
395 logger.info("creating %s" % archive_dir)
396 if not dry_run:
397 os.makedirs(archive_dir)
398
Tarek Ziadé396fad72010-02-23 05:30:31 +0000399 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000400 if logger is not None:
401 logger.info('Creating tar archive')
402
403 uid = _get_uid(owner)
404 gid = _get_gid(group)
405
406 def _set_uid_gid(tarinfo):
407 if gid is not None:
408 tarinfo.gid = gid
409 tarinfo.gname = group
410 if uid is not None:
411 tarinfo.uid = uid
412 tarinfo.uname = owner
413 return tarinfo
414
415 if not dry_run:
416 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
417 try:
418 tar.add(base_dir, filter=_set_uid_gid)
419 finally:
420 tar.close()
421
Tarek Ziadé396fad72010-02-23 05:30:31 +0000422 return archive_name
423
Tarek Ziadée2124162010-04-21 13:35:21 +0000424def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000425 # XXX see if we want to keep an external call here
426 if verbose:
427 zipoptions = "-r"
428 else:
429 zipoptions = "-rq"
430 from distutils.errors import DistutilsExecError
431 from distutils.spawn import spawn
432 try:
433 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
434 except DistutilsExecError:
435 # XXX really should distinguish between "couldn't find
436 # external 'zip' command" and "zip failed".
437 raise ExecError("unable to create zip file '%s': "
438 "could neither import the 'zipfile' module nor "
439 "find a standalone zip utility") % zip_filename
440
441def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
442 """Create a zip file from all the files under 'base_dir'.
443
444 The output zip file will be named 'base_dir' + ".zip". Uses either the
445 "zipfile" Python module (if available) or the InfoZIP "zip" utility
446 (if installed and found on the default search path). If neither tool is
447 available, raises ExecError. Returns the name of the output zip
448 file.
449 """
450 zip_filename = base_name + ".zip"
451 archive_dir = os.path.dirname(base_name)
452
453 if not os.path.exists(archive_dir):
454 if logger is not None:
455 logger.info("creating %s", archive_dir)
456 if not dry_run:
457 os.makedirs(archive_dir)
458
459 # If zipfile module is not available, try spawning an external 'zip'
460 # command.
461 try:
462 import zipfile
463 except ImportError:
464 zipfile = None
465
466 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000467 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000468 else:
469 if logger is not None:
470 logger.info("creating '%s' and adding '%s' to it",
471 zip_filename, base_dir)
472
473 if not dry_run:
474 zip = zipfile.ZipFile(zip_filename, "w",
475 compression=zipfile.ZIP_DEFLATED)
476
477 for dirpath, dirnames, filenames in os.walk(base_dir):
478 for name in filenames:
479 path = os.path.normpath(os.path.join(dirpath, name))
480 if os.path.isfile(path):
481 zip.write(path, path)
482 if logger is not None:
483 logger.info("adding '%s'", path)
484 zip.close()
485
486 return zip_filename
487
488_ARCHIVE_FORMATS = {
489 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
490 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000491 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
492 'zip': (_make_zipfile, [],"ZIP file")
493 }
494
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000495if _BZ2_SUPPORTED:
496 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
497 "bzip2'ed tar-file")
498
Tarek Ziadé396fad72010-02-23 05:30:31 +0000499def get_archive_formats():
500 """Returns a list of supported formats for archiving and unarchiving.
501
502 Each element of the returned sequence is a tuple (name, description)
503 """
504 formats = [(name, registry[2]) for name, registry in
505 _ARCHIVE_FORMATS.items()]
506 formats.sort()
507 return formats
508
509def register_archive_format(name, function, extra_args=None, description=''):
510 """Registers an archive format.
511
512 name is the name of the format. function is the callable that will be
513 used to create archives. If provided, extra_args is a sequence of
514 (name, value) tuples that will be passed as arguments to the callable.
515 description can be provided to describe the format, and will be returned
516 by the get_archive_formats() function.
517 """
518 if extra_args is None:
519 extra_args = []
520 if not isinstance(function, collections.Callable):
521 raise TypeError('The %s object is not callable' % function)
522 if not isinstance(extra_args, (tuple, list)):
523 raise TypeError('extra_args needs to be a sequence')
524 for element in extra_args:
525 if not isinstance(element, (tuple, list)) or len(element) !=2 :
526 raise TypeError('extra_args elements are : (arg_name, value)')
527
528 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
529
530def unregister_archive_format(name):
531 del _ARCHIVE_FORMATS[name]
532
533def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
534 dry_run=0, owner=None, group=None, logger=None):
535 """Create an archive file (eg. zip or tar).
536
537 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000538 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
539 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000540
541 'root_dir' is a directory that will be the root directory of the
542 archive; ie. we typically chdir into 'root_dir' before creating the
543 archive. 'base_dir' is the directory where we start archiving from;
544 ie. 'base_dir' will be the common prefix of all files and
545 directories in the archive. 'root_dir' and 'base_dir' both default
546 to the current directory. Returns the name of the archive file.
547
548 'owner' and 'group' are used when creating a tar archive. By default,
549 uses the current owner and group.
550 """
551 save_cwd = os.getcwd()
552 if root_dir is not None:
553 if logger is not None:
554 logger.debug("changing into '%s'", root_dir)
555 base_name = os.path.abspath(base_name)
556 if not dry_run:
557 os.chdir(root_dir)
558
559 if base_dir is None:
560 base_dir = os.curdir
561
562 kwargs = {'dry_run': dry_run, 'logger': logger}
563
564 try:
565 format_info = _ARCHIVE_FORMATS[format]
566 except KeyError:
567 raise ValueError("unknown archive format '%s'" % format)
568
569 func = format_info[0]
570 for arg, val in format_info[1]:
571 kwargs[arg] = val
572
573 if format != 'zip':
574 kwargs['owner'] = owner
575 kwargs['group'] = group
576
577 try:
578 filename = func(base_name, base_dir, **kwargs)
579 finally:
580 if root_dir is not None:
581 if logger is not None:
582 logger.debug("changing back to '%s'", save_cwd)
583 os.chdir(save_cwd)
584
585 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000586
587
588def get_unpack_formats():
589 """Returns a list of supported formats for unpacking.
590
591 Each element of the returned sequence is a tuple
592 (name, extensions, description)
593 """
594 formats = [(name, info[0], info[3]) for name, info in
595 _UNPACK_FORMATS.items()]
596 formats.sort()
597 return formats
598
599def _check_unpack_options(extensions, function, extra_args):
600 """Checks what gets registered as an unpacker."""
601 # first make sure no other unpacker is registered for this extension
602 existing_extensions = {}
603 for name, info in _UNPACK_FORMATS.items():
604 for ext in info[0]:
605 existing_extensions[ext] = name
606
607 for extension in extensions:
608 if extension in existing_extensions:
609 msg = '%s is already registered for "%s"'
610 raise RegistryError(msg % (extension,
611 existing_extensions[extension]))
612
613 if not isinstance(function, collections.Callable):
614 raise TypeError('The registered function must be a callable')
615
616
617def register_unpack_format(name, extensions, function, extra_args=None,
618 description=''):
619 """Registers an unpack format.
620
621 `name` is the name of the format. `extensions` is a list of extensions
622 corresponding to the format.
623
624 `function` is the callable that will be
625 used to unpack archives. The callable will receive archives to unpack.
626 If it's unable to handle an archive, it needs to raise a ReadError
627 exception.
628
629 If provided, `extra_args` is a sequence of
630 (name, value) tuples that will be passed as arguments to the callable.
631 description can be provided to describe the format, and will be returned
632 by the get_unpack_formats() function.
633 """
634 if extra_args is None:
635 extra_args = []
636 _check_unpack_options(extensions, function, extra_args)
637 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
638
639def unregister_unpack_format(name):
640 """Removes the pack format from the registery."""
641 del _UNPACK_FORMATS[name]
642
643def _ensure_directory(path):
644 """Ensure that the parent directory of `path` exists"""
645 dirname = os.path.dirname(path)
646 if not os.path.isdir(dirname):
647 os.makedirs(dirname)
648
649def _unpack_zipfile(filename, extract_dir):
650 """Unpack zip `filename` to `extract_dir`
651 """
652 try:
653 import zipfile
654 except ImportError:
655 raise ReadError('zlib not supported, cannot unpack this archive.')
656
657 if not zipfile.is_zipfile(filename):
658 raise ReadError("%s is not a zip file" % filename)
659
660 zip = zipfile.ZipFile(filename)
661 try:
662 for info in zip.infolist():
663 name = info.filename
664
665 # don't extract absolute paths or ones with .. in them
666 if name.startswith('/') or '..' in name:
667 continue
668
669 target = os.path.join(extract_dir, *name.split('/'))
670 if not target:
671 continue
672
673 _ensure_directory(target)
674 if not name.endswith('/'):
675 # file
676 data = zip.read(info.filename)
677 f = open(target,'wb')
678 try:
679 f.write(data)
680 finally:
681 f.close()
682 del data
683 finally:
684 zip.close()
685
686def _unpack_tarfile(filename, extract_dir):
687 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
688 """
689 try:
690 tarobj = tarfile.open(filename)
691 except tarfile.TarError:
692 raise ReadError(
693 "%s is not a compressed or uncompressed tar file" % filename)
694 try:
695 tarobj.extractall(extract_dir)
696 finally:
697 tarobj.close()
698
699_UNPACK_FORMATS = {
700 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000701 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
702 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
703 }
704
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000705if _BZ2_SUPPORTED:
706 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
707 "bzip2'ed tar-file")
708
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000709def _find_unpack_format(filename):
710 for name, info in _UNPACK_FORMATS.items():
711 for extension in info[0]:
712 if filename.endswith(extension):
713 return name
714 return None
715
716def unpack_archive(filename, extract_dir=None, format=None):
717 """Unpack an archive.
718
719 `filename` is the name of the archive.
720
721 `extract_dir` is the name of the target directory, where the archive
722 is unpacked. If not provided, the current working directory is used.
723
724 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
725 other registered format. If not provided, unpack_archive will use the
726 filename extension and see if an unpacker was registered for that
727 extension.
728
729 In case none is found, a ValueError is raised.
730 """
731 if extract_dir is None:
732 extract_dir = os.getcwd()
733
734 if format is not None:
735 try:
736 format_info = _UNPACK_FORMATS[format]
737 except KeyError:
738 raise ValueError("Unknown unpack format '{0}'".format(format))
739
740 func = format_info[0]
741 func(filename, extract_dir, **dict(format_info[1]))
742 else:
743 # we need to look at the registered unpackers supported extensions
744 format = _find_unpack_format(filename)
745 if format is None:
746 raise ReadError("Unknown archive format '{0}'".format(filename))
747
748 func = _UNPACK_FORMATS[format][1]
749 kwargs = dict(_UNPACK_FORMATS[format][2])
750 func(filename, extract_dir, **kwargs)