blob: 5f69fb7b75d6d6d584a3218960d76f9ef8ce2d3a [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
20except ImportError:
21 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
25except ImportError:
26 getpwnam = None
27
28try:
29 from grp import getgrnam
30except ImportError:
31 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Éric Araujo0ac4a5d2011-09-01 08:31:51 +020039 "ignore_patterns", "chown"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020040 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000041
Neal Norwitz4ce69a52005-09-01 00:45:28 +000042class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000043 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Antoine Pitrou7fff0962009-05-01 21:09:44 +000045class SpecialFileError(EnvironmentError):
46 """Raised when trying to do a kind of operation (e.g. copying) which is
47 not supported on a special file (e.g. a named pipe)"""
48
Tarek Ziadé396fad72010-02-23 05:30:31 +000049class ExecError(EnvironmentError):
50 """Raised when a command could not be executed"""
51
Tarek Ziadé6ac91722010-04-28 17:51:36 +000052class ReadError(EnvironmentError):
53 """Raised when an archive cannot be read"""
54
55class RegistryError(Exception):
56 """Raised when a registery operation with the archiving
57 and unpacking registeries fails"""
58
59
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000060try:
61 WindowsError
62except NameError:
63 WindowsError = None
64
Greg Stein42bb8b32000-07-12 09:55:30 +000065def copyfileobj(fsrc, fdst, length=16*1024):
66 """copy data from file-like object fsrc to file-like object fdst"""
67 while 1:
68 buf = fsrc.read(length)
69 if not buf:
70 break
71 fdst.write(buf)
72
Johannes Gijsbers46f14592004-08-14 13:30:02 +000073def _samefile(src, dst):
74 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000075 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000076 try:
77 return os.path.samefile(src, dst)
78 except OSError:
79 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000080
81 # All other platforms: check for same pathname.
82 return (os.path.normcase(os.path.abspath(src)) ==
83 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000084
Antoine Pitrou78091e62011-12-29 18:54:15 +010085def copyfile(src, dst, symlinks=False):
86 """Copy data from src to dst.
87
88 If optional flag `symlinks` is set and `src` is a symbolic link, a new
89 symlink will be created instead of copying the file it points to.
90
91 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000092 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000093 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000094
Antoine Pitrou7fff0962009-05-01 21:09:44 +000095 for fn in [src, dst]:
96 try:
97 st = os.stat(fn)
98 except OSError:
99 # File most likely does not exist
100 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000101 else:
102 # XXX What about other special files? (sockets, devices...)
103 if stat.S_ISFIFO(st.st_mode):
104 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000105
Antoine Pitrou78091e62011-12-29 18:54:15 +0100106 if symlinks and os.path.islink(src):
107 os.symlink(os.readlink(src), dst)
108 else:
109 with open(src, 'rb') as fsrc:
110 with open(dst, 'wb') as fdst:
111 copyfileobj(fsrc, fdst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000112
Antoine Pitrou78091e62011-12-29 18:54:15 +0100113def copymode(src, dst, symlinks=False):
114 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000115
Antoine Pitrou78091e62011-12-29 18:54:15 +0100116 If the optional flag `symlinks` is set, symlinks aren't followed if and
117 only if both `src` and `dst` are symlinks. If `lchmod` isn't available (eg.
118 Linux), in these cases, this method does nothing.
119
120 """
121 if symlinks and os.path.islink(src) and os.path.islink(dst):
122 if hasattr(os, 'lchmod'):
123 stat_func, chmod_func = os.lstat, os.lchmod
124 else:
125 return
126 elif hasattr(os, 'chmod'):
127 stat_func, chmod_func = os.stat, os.chmod
128 else:
129 return
130
131 st = stat_func(src)
132 chmod_func(dst, stat.S_IMODE(st.st_mode))
133
134def copystat(src, dst, symlinks=False):
135 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
136
137 If the optional flag `symlinks` is set, symlinks aren't followed if and
138 only if both `src` and `dst` are symlinks.
139
140 """
141 def _nop(*args):
142 pass
143
144 if symlinks and os.path.islink(src) and os.path.islink(dst):
145 stat_func = os.lstat
146 utime_func = os.lutimes if hasattr(os, 'lutimes') else _nop
147 chmod_func = os.lchmod if hasattr(os, 'lchmod') else _nop
148 chflags_func = os.lchflags if hasattr(os, 'lchflags') else _nop
149 else:
150 stat_func = os.stat
151 utime_func = os.utime if hasattr(os, 'utime') else _nop
152 chmod_func = os.chmod if hasattr(os, 'chmod') else _nop
153 chflags_func = os.chflags if hasattr(os, 'chflags') else _nop
154
155 st = stat_func(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000156 mode = stat.S_IMODE(st.st_mode)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100157 utime_func(dst, (st.st_atime, st.st_mtime))
158 chmod_func(dst, mode)
159 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000160 try:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100161 chflags_func(dst, st.st_flags)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000162 except OSError as why:
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000163 if (not hasattr(errno, 'EOPNOTSUPP') or
164 why.errno != errno.EOPNOTSUPP):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000165 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000166
Antoine Pitrou78091e62011-12-29 18:54:15 +0100167def copy(src, dst, symlinks=False):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000168 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000169
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000170 The destination may be a directory.
171
Antoine Pitrou78091e62011-12-29 18:54:15 +0100172 If the optional flag `symlinks` is set, symlinks won't be followed. This
173 resembles GNU's "cp -P src dst".
174
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000175 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000176 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100178 copyfile(src, dst, symlinks=symlinks)
179 copymode(src, dst, symlinks=symlinks)
Guido van Rossumc6360141990-10-13 19:23:40 +0000180
Antoine Pitrou78091e62011-12-29 18:54:15 +0100181def copy2(src, dst, symlinks=False):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000182 """Copy data and all stat info ("cp -p src dst").
183
184 The destination may be a directory.
185
Antoine Pitrou78091e62011-12-29 18:54:15 +0100186 If the optional flag `symlinks` is set, symlinks won't be followed. This
187 resembles GNU's "cp -P src dst".
188
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000189 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000190 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000191 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100192 copyfile(src, dst, symlinks=symlinks)
193 copystat(src, dst, symlinks=symlinks)
Guido van Rossumc6360141990-10-13 19:23:40 +0000194
Georg Brandl2ee470f2008-07-16 12:55:28 +0000195def ignore_patterns(*patterns):
196 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000197
Georg Brandl2ee470f2008-07-16 12:55:28 +0000198 Patterns is a sequence of glob-style patterns
199 that are used to exclude files"""
200 def _ignore_patterns(path, names):
201 ignored_names = []
202 for pattern in patterns:
203 ignored_names.extend(fnmatch.filter(names, pattern))
204 return set(ignored_names)
205 return _ignore_patterns
206
Tarek Ziadéfb437512010-04-20 08:57:33 +0000207def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
208 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000209 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000210
211 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000212 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000213
214 If the optional symlinks flag is true, symbolic links in the
215 source tree result in symbolic links in the destination tree; if
216 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000217 links are copied. If the file pointed by the symlink doesn't
218 exist, an exception will be added in the list of errors raised in
219 an Error exception at the end of the copy process.
220
221 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000222 want to silence this exception. Notice that this has no effect on
223 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000224
Georg Brandl2ee470f2008-07-16 12:55:28 +0000225 The optional ignore argument is a callable. If given, it
226 is called with the `src` parameter, which is the directory
227 being visited by copytree(), and `names` which is the list of
228 `src` contents, as returned by os.listdir():
229
230 callable(src, names) -> ignored_names
231
232 Since copytree() is called recursively, the callable will be
233 called once for each directory that is copied. It returns a
234 list of names relative to the `src` directory that should
235 not be copied.
236
Tarek Ziadé5340db32010-04-19 22:30:51 +0000237 The optional copy_function argument is a callable that will be used
238 to copy each file. It will be called with the source path and the
239 destination path as arguments. By default, copy2() is used, but any
240 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000241
242 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000243 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 if ignore is not None:
245 ignored_names = ignore(src, names)
246 else:
247 ignored_names = set()
248
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000249 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000250 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000251 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 if name in ignored_names:
253 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 srcname = os.path.join(src, name)
255 dstname = os.path.join(dst, name)
256 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000257 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000258 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000259 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100260 # We can't just leave it to `copy_function` because legacy
261 # code with a custom `copy_function` may rely on copytree
262 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000263 os.symlink(linkto, dstname)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100264 copystat(srcname, dstname, symlinks=symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000265 else:
266 # ignore dangling symlink if the flag is on
267 if not os.path.exists(linkto) and ignore_dangling_symlinks:
268 continue
269 # otherwise let the copy occurs. copy2 will raise an error
270 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000271 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000272 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000273 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000274 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000275 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000276 # catch the Error from the recursive copytree so that we can
277 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000278 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000279 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000280 except EnvironmentError as why:
281 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000282 try:
283 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000284 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000285 if WindowsError is not None and isinstance(why, WindowsError):
286 # Copying file access times may fail on Windows
287 pass
288 else:
289 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000290 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000291 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000292
Barry Warsaw234d9a92003-01-24 17:36:15 +0000293def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000294 """Recursively delete a directory tree.
295
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000296 If ignore_errors is set, errors are ignored; otherwise, if onerror
297 is set, it is called to handle the error with arguments (func,
298 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
299 path is the argument to that function that caused it to fail; and
300 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
301 is false and onerror is None, an exception is raised.
302
Guido van Rossumd7673291998-02-06 21:38:09 +0000303 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000304 if ignore_errors:
305 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000306 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000307 elif onerror is None:
308 def onerror(*args):
309 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000310 try:
311 if os.path.islink(path):
312 # symlinks to directories are forbidden, see bug #1669
313 raise OSError("Cannot call rmtree on a symbolic link")
314 except OSError:
315 onerror(os.path.islink, path, sys.exc_info())
316 # can't continue even if onerror hook returns
317 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000318 names = []
319 try:
320 names = os.listdir(path)
Éric Araujocfcc9772011-08-10 20:54:33 +0200321 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000322 onerror(os.listdir, path, sys.exc_info())
323 for name in names:
324 fullname = os.path.join(path, name)
325 try:
326 mode = os.lstat(fullname).st_mode
327 except os.error:
328 mode = 0
329 if stat.S_ISDIR(mode):
330 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000331 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000332 try:
333 os.remove(fullname)
Éric Araujocfcc9772011-08-10 20:54:33 +0200334 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000335 onerror(os.remove, fullname, sys.exc_info())
336 try:
337 os.rmdir(path)
338 except os.error:
339 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000340
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000341
Christian Heimesada8c3b2008-03-18 18:26:33 +0000342def _basename(path):
343 # A basename() variant which first strips the trailing slash, if present.
344 # Thus we always get the last component of the path, even for directories.
345 return os.path.basename(path.rstrip(os.path.sep))
346
347def move(src, dst):
348 """Recursively move a file or directory to another location. This is
349 similar to the Unix "mv" command.
350
351 If the destination is a directory or a symlink to a directory, the source
352 is moved inside the directory. The destination path must not already
353 exist.
354
355 If the destination already exists but is not a directory, it may be
356 overwritten depending on os.rename() semantics.
357
358 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100359 Otherwise, src is copied to the destination and then removed. Symlinks are
360 recreated under the new name if os.rename() fails because of cross
361 filesystem renames.
362
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000363 A lot more could be done here... A look at a mv.c shows a lot of
364 the issues this implementation glosses over.
365
366 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000367 real_dst = dst
368 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200369 if _samefile(src, dst):
370 # We might be on a case insensitive filesystem,
371 # perform the rename anyway.
372 os.rename(src, dst)
373 return
374
Christian Heimesada8c3b2008-03-18 18:26:33 +0000375 real_dst = os.path.join(dst, _basename(src))
376 if os.path.exists(real_dst):
377 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000378 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000379 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200380 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100381 if os.path.islink(src):
382 linkto = os.readlink(src)
383 os.symlink(linkto, real_dst)
384 os.unlink(src)
385 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000386 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000387 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000388 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000389 rmtree(src)
390 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000391 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000392 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000393
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000394def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000395 src = abspath(src)
396 dst = abspath(dst)
397 if not src.endswith(os.path.sep):
398 src += os.path.sep
399 if not dst.endswith(os.path.sep):
400 dst += os.path.sep
401 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000402
403def _get_gid(name):
404 """Returns a gid, given a group name."""
405 if getgrnam is None or name is None:
406 return None
407 try:
408 result = getgrnam(name)
409 except KeyError:
410 result = None
411 if result is not None:
412 return result[2]
413 return None
414
415def _get_uid(name):
416 """Returns an uid, given a user name."""
417 if getpwnam is None or name is None:
418 return None
419 try:
420 result = getpwnam(name)
421 except KeyError:
422 result = None
423 if result is not None:
424 return result[2]
425 return None
426
427def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
428 owner=None, group=None, logger=None):
429 """Create a (possibly compressed) tar file from all the files under
430 'base_dir'.
431
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000432 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000433
434 'owner' and 'group' can be used to define an owner and a group for the
435 archive that is being built. If not provided, the current owner and group
436 will be used.
437
Éric Araujo4433a5f2010-12-15 20:26:30 +0000438 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000439 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000440
441 Returns the output filename.
442 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000443 tar_compression = {'gzip': 'gz', None: ''}
444 compress_ext = {'gzip': '.gz'}
445
446 if _BZ2_SUPPORTED:
447 tar_compression['bzip2'] = 'bz2'
448 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000449
450 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200451 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000452 raise ValueError("bad value for 'compress', or compression format not "
453 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000454
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000455 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000456 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000457
Tarek Ziadé396fad72010-02-23 05:30:31 +0000458 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000459 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200460 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000461 if not dry_run:
462 os.makedirs(archive_dir)
463
Tarek Ziadé396fad72010-02-23 05:30:31 +0000464 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000465 if logger is not None:
466 logger.info('Creating tar archive')
467
468 uid = _get_uid(owner)
469 gid = _get_gid(group)
470
471 def _set_uid_gid(tarinfo):
472 if gid is not None:
473 tarinfo.gid = gid
474 tarinfo.gname = group
475 if uid is not None:
476 tarinfo.uid = uid
477 tarinfo.uname = owner
478 return tarinfo
479
480 if not dry_run:
481 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
482 try:
483 tar.add(base_dir, filter=_set_uid_gid)
484 finally:
485 tar.close()
486
Tarek Ziadé396fad72010-02-23 05:30:31 +0000487 return archive_name
488
Tarek Ziadée2124162010-04-21 13:35:21 +0000489def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000490 # XXX see if we want to keep an external call here
491 if verbose:
492 zipoptions = "-r"
493 else:
494 zipoptions = "-rq"
495 from distutils.errors import DistutilsExecError
496 from distutils.spawn import spawn
497 try:
498 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
499 except DistutilsExecError:
500 # XXX really should distinguish between "couldn't find
501 # external 'zip' command" and "zip failed".
502 raise ExecError("unable to create zip file '%s': "
503 "could neither import the 'zipfile' module nor "
504 "find a standalone zip utility") % zip_filename
505
506def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
507 """Create a zip file from all the files under 'base_dir'.
508
Éric Araujo4433a5f2010-12-15 20:26:30 +0000509 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000510 "zipfile" Python module (if available) or the InfoZIP "zip" utility
511 (if installed and found on the default search path). If neither tool is
512 available, raises ExecError. Returns the name of the output zip
513 file.
514 """
515 zip_filename = base_name + ".zip"
516 archive_dir = os.path.dirname(base_name)
517
518 if not os.path.exists(archive_dir):
519 if logger is not None:
520 logger.info("creating %s", archive_dir)
521 if not dry_run:
522 os.makedirs(archive_dir)
523
524 # If zipfile module is not available, try spawning an external 'zip'
525 # command.
526 try:
527 import zipfile
528 except ImportError:
529 zipfile = None
530
531 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000532 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000533 else:
534 if logger is not None:
535 logger.info("creating '%s' and adding '%s' to it",
536 zip_filename, base_dir)
537
538 if not dry_run:
539 zip = zipfile.ZipFile(zip_filename, "w",
540 compression=zipfile.ZIP_DEFLATED)
541
542 for dirpath, dirnames, filenames in os.walk(base_dir):
543 for name in filenames:
544 path = os.path.normpath(os.path.join(dirpath, name))
545 if os.path.isfile(path):
546 zip.write(path, path)
547 if logger is not None:
548 logger.info("adding '%s'", path)
549 zip.close()
550
551 return zip_filename
552
553_ARCHIVE_FORMATS = {
554 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
555 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000556 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200557 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000558 }
559
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000560if _BZ2_SUPPORTED:
561 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
562 "bzip2'ed tar-file")
563
Tarek Ziadé396fad72010-02-23 05:30:31 +0000564def get_archive_formats():
565 """Returns a list of supported formats for archiving and unarchiving.
566
567 Each element of the returned sequence is a tuple (name, description)
568 """
569 formats = [(name, registry[2]) for name, registry in
570 _ARCHIVE_FORMATS.items()]
571 formats.sort()
572 return formats
573
574def register_archive_format(name, function, extra_args=None, description=''):
575 """Registers an archive format.
576
577 name is the name of the format. function is the callable that will be
578 used to create archives. If provided, extra_args is a sequence of
579 (name, value) tuples that will be passed as arguments to the callable.
580 description can be provided to describe the format, and will be returned
581 by the get_archive_formats() function.
582 """
583 if extra_args is None:
584 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200585 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000586 raise TypeError('The %s object is not callable' % function)
587 if not isinstance(extra_args, (tuple, list)):
588 raise TypeError('extra_args needs to be a sequence')
589 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200590 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000591 raise TypeError('extra_args elements are : (arg_name, value)')
592
593 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
594
595def unregister_archive_format(name):
596 del _ARCHIVE_FORMATS[name]
597
598def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
599 dry_run=0, owner=None, group=None, logger=None):
600 """Create an archive file (eg. zip or tar).
601
602 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000603 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
604 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000605
606 'root_dir' is a directory that will be the root directory of the
607 archive; ie. we typically chdir into 'root_dir' before creating the
608 archive. 'base_dir' is the directory where we start archiving from;
609 ie. 'base_dir' will be the common prefix of all files and
610 directories in the archive. 'root_dir' and 'base_dir' both default
611 to the current directory. Returns the name of the archive file.
612
613 'owner' and 'group' are used when creating a tar archive. By default,
614 uses the current owner and group.
615 """
616 save_cwd = os.getcwd()
617 if root_dir is not None:
618 if logger is not None:
619 logger.debug("changing into '%s'", root_dir)
620 base_name = os.path.abspath(base_name)
621 if not dry_run:
622 os.chdir(root_dir)
623
624 if base_dir is None:
625 base_dir = os.curdir
626
627 kwargs = {'dry_run': dry_run, 'logger': logger}
628
629 try:
630 format_info = _ARCHIVE_FORMATS[format]
631 except KeyError:
632 raise ValueError("unknown archive format '%s'" % format)
633
634 func = format_info[0]
635 for arg, val in format_info[1]:
636 kwargs[arg] = val
637
638 if format != 'zip':
639 kwargs['owner'] = owner
640 kwargs['group'] = group
641
642 try:
643 filename = func(base_name, base_dir, **kwargs)
644 finally:
645 if root_dir is not None:
646 if logger is not None:
647 logger.debug("changing back to '%s'", save_cwd)
648 os.chdir(save_cwd)
649
650 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000651
652
653def get_unpack_formats():
654 """Returns a list of supported formats for unpacking.
655
656 Each element of the returned sequence is a tuple
657 (name, extensions, description)
658 """
659 formats = [(name, info[0], info[3]) for name, info in
660 _UNPACK_FORMATS.items()]
661 formats.sort()
662 return formats
663
664def _check_unpack_options(extensions, function, extra_args):
665 """Checks what gets registered as an unpacker."""
666 # first make sure no other unpacker is registered for this extension
667 existing_extensions = {}
668 for name, info in _UNPACK_FORMATS.items():
669 for ext in info[0]:
670 existing_extensions[ext] = name
671
672 for extension in extensions:
673 if extension in existing_extensions:
674 msg = '%s is already registered for "%s"'
675 raise RegistryError(msg % (extension,
676 existing_extensions[extension]))
677
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200678 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000679 raise TypeError('The registered function must be a callable')
680
681
682def register_unpack_format(name, extensions, function, extra_args=None,
683 description=''):
684 """Registers an unpack format.
685
686 `name` is the name of the format. `extensions` is a list of extensions
687 corresponding to the format.
688
689 `function` is the callable that will be
690 used to unpack archives. The callable will receive archives to unpack.
691 If it's unable to handle an archive, it needs to raise a ReadError
692 exception.
693
694 If provided, `extra_args` is a sequence of
695 (name, value) tuples that will be passed as arguments to the callable.
696 description can be provided to describe the format, and will be returned
697 by the get_unpack_formats() function.
698 """
699 if extra_args is None:
700 extra_args = []
701 _check_unpack_options(extensions, function, extra_args)
702 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
703
704def unregister_unpack_format(name):
705 """Removes the pack format from the registery."""
706 del _UNPACK_FORMATS[name]
707
708def _ensure_directory(path):
709 """Ensure that the parent directory of `path` exists"""
710 dirname = os.path.dirname(path)
711 if not os.path.isdir(dirname):
712 os.makedirs(dirname)
713
714def _unpack_zipfile(filename, extract_dir):
715 """Unpack zip `filename` to `extract_dir`
716 """
717 try:
718 import zipfile
719 except ImportError:
720 raise ReadError('zlib not supported, cannot unpack this archive.')
721
722 if not zipfile.is_zipfile(filename):
723 raise ReadError("%s is not a zip file" % filename)
724
725 zip = zipfile.ZipFile(filename)
726 try:
727 for info in zip.infolist():
728 name = info.filename
729
730 # don't extract absolute paths or ones with .. in them
731 if name.startswith('/') or '..' in name:
732 continue
733
734 target = os.path.join(extract_dir, *name.split('/'))
735 if not target:
736 continue
737
738 _ensure_directory(target)
739 if not name.endswith('/'):
740 # file
741 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200742 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000743 try:
744 f.write(data)
745 finally:
746 f.close()
747 del data
748 finally:
749 zip.close()
750
751def _unpack_tarfile(filename, extract_dir):
752 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
753 """
754 try:
755 tarobj = tarfile.open(filename)
756 except tarfile.TarError:
757 raise ReadError(
758 "%s is not a compressed or uncompressed tar file" % filename)
759 try:
760 tarobj.extractall(extract_dir)
761 finally:
762 tarobj.close()
763
764_UNPACK_FORMATS = {
765 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000766 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
767 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
768 }
769
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000770if _BZ2_SUPPORTED:
771 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
772 "bzip2'ed tar-file")
773
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000774def _find_unpack_format(filename):
775 for name, info in _UNPACK_FORMATS.items():
776 for extension in info[0]:
777 if filename.endswith(extension):
778 return name
779 return None
780
781def unpack_archive(filename, extract_dir=None, format=None):
782 """Unpack an archive.
783
784 `filename` is the name of the archive.
785
786 `extract_dir` is the name of the target directory, where the archive
787 is unpacked. If not provided, the current working directory is used.
788
789 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
790 other registered format. If not provided, unpack_archive will use the
791 filename extension and see if an unpacker was registered for that
792 extension.
793
794 In case none is found, a ValueError is raised.
795 """
796 if extract_dir is None:
797 extract_dir = os.getcwd()
798
799 if format is not None:
800 try:
801 format_info = _UNPACK_FORMATS[format]
802 except KeyError:
803 raise ValueError("Unknown unpack format '{0}'".format(format))
804
Nick Coghlanabf202d2011-03-16 13:52:20 -0400805 func = format_info[1]
806 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000807 else:
808 # we need to look at the registered unpackers supported extensions
809 format = _find_unpack_format(filename)
810 if format is None:
811 raise ReadError("Unknown archive format '{0}'".format(filename))
812
813 func = _UNPACK_FORMATS[format][1]
814 kwargs = dict(_UNPACK_FORMATS[format][2])
815 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200816
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200817
818if hasattr(os, 'statvfs'):
819
820 __all__.append('disk_usage')
821 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200822
823 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200824 """Return disk usage statistics about the given path.
825
826 Returned valus is a named tuple with attributes 'total', 'used' and
827 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200828 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200829 st = os.statvfs(path)
830 free = st.f_bavail * st.f_frsize
831 total = st.f_blocks * st.f_frsize
832 used = (st.f_blocks - st.f_bfree) * st.f_frsize
833 return _ntuple_diskusage(total, used, free)
834
835elif os.name == 'nt':
836
837 import nt
838 __all__.append('disk_usage')
839 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
840
841 def disk_usage(path):
842 """Return disk usage statistics about the given path.
843
844 Returned valus is a named tuple with attributes 'total', 'used' and
845 'free', which are the amount of total, used and free space, in bytes.
846 """
847 total, free = nt._getdiskusage(path)
848 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200849 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200850
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200851
Sandro Tosid902a142011-08-22 23:28:27 +0200852def chown(path, user=None, group=None):
853 """Change owner user and group of the given path.
854
855 user and group can be the uid/gid or the user/group names, and in that case,
856 they are converted to their respective uid/gid.
857 """
858
859 if user is None and group is None:
860 raise ValueError("user and/or group must be set")
861
862 _user = user
863 _group = group
864
865 # -1 means don't change it
866 if user is None:
867 _user = -1
868 # user can either be an int (the uid) or a string (the system username)
869 elif isinstance(user, str):
870 _user = _get_uid(user)
871 if _user is None:
872 raise LookupError("no such user: {!r}".format(user))
873
874 if group is None:
875 _group = -1
876 elif not isinstance(group, int):
877 _group = _get_gid(group)
878 if _group is None:
879 raise LookupError("no such group: {!r}".format(group))
880
881 os.chown(path, _user, _group)