blob: 95bebb874ab30fac667b0862b25b6cfc38c900c7 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
20except ImportError:
21 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
25except ImportError:
26 getpwnam = None
27
28try:
29 from grp import getgrnam
30except ImportError:
31 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Éric Araujo0ac4a5d2011-09-01 08:31:51 +020039 "ignore_patterns", "chown"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020040 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000041
Neal Norwitz4ce69a52005-09-01 00:45:28 +000042class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000043 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Antoine Pitrou7fff0962009-05-01 21:09:44 +000045class SpecialFileError(EnvironmentError):
46 """Raised when trying to do a kind of operation (e.g. copying) which is
47 not supported on a special file (e.g. a named pipe)"""
48
Tarek Ziadé396fad72010-02-23 05:30:31 +000049class ExecError(EnvironmentError):
50 """Raised when a command could not be executed"""
51
Tarek Ziadé6ac91722010-04-28 17:51:36 +000052class ReadError(EnvironmentError):
53 """Raised when an archive cannot be read"""
54
55class RegistryError(Exception):
56 """Raised when a registery operation with the archiving
57 and unpacking registeries fails"""
58
59
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000060try:
61 WindowsError
62except NameError:
63 WindowsError = None
64
Greg Stein42bb8b32000-07-12 09:55:30 +000065def copyfileobj(fsrc, fdst, length=16*1024):
66 """copy data from file-like object fsrc to file-like object fdst"""
67 while 1:
68 buf = fsrc.read(length)
69 if not buf:
70 break
71 fdst.write(buf)
72
Johannes Gijsbers46f14592004-08-14 13:30:02 +000073def _samefile(src, dst):
74 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000075 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000076 try:
77 return os.path.samefile(src, dst)
78 except OSError:
79 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000080
81 # All other platforms: check for same pathname.
82 return (os.path.normcase(os.path.abspath(src)) ==
83 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000084
Antoine Pitrou78091e62011-12-29 18:54:15 +010085def copyfile(src, dst, symlinks=False):
86 """Copy data from src to dst.
87
88 If optional flag `symlinks` is set and `src` is a symbolic link, a new
89 symlink will be created instead of copying the file it points to.
90
91 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000092 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000093 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000094
Antoine Pitrou7fff0962009-05-01 21:09:44 +000095 for fn in [src, dst]:
96 try:
97 st = os.stat(fn)
98 except OSError:
99 # File most likely does not exist
100 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000101 else:
102 # XXX What about other special files? (sockets, devices...)
103 if stat.S_ISFIFO(st.st_mode):
104 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000105
Antoine Pitrou78091e62011-12-29 18:54:15 +0100106 if symlinks and os.path.islink(src):
107 os.symlink(os.readlink(src), dst)
108 else:
109 with open(src, 'rb') as fsrc:
110 with open(dst, 'wb') as fdst:
111 copyfileobj(fsrc, fdst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000112
Antoine Pitrou78091e62011-12-29 18:54:15 +0100113def copymode(src, dst, symlinks=False):
114 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000115
Antoine Pitrou78091e62011-12-29 18:54:15 +0100116 If the optional flag `symlinks` is set, symlinks aren't followed if and
117 only if both `src` and `dst` are symlinks. If `lchmod` isn't available (eg.
118 Linux), in these cases, this method does nothing.
119
120 """
121 if symlinks and os.path.islink(src) and os.path.islink(dst):
122 if hasattr(os, 'lchmod'):
123 stat_func, chmod_func = os.lstat, os.lchmod
124 else:
125 return
126 elif hasattr(os, 'chmod'):
127 stat_func, chmod_func = os.stat, os.chmod
128 else:
129 return
130
131 st = stat_func(src)
132 chmod_func(dst, stat.S_IMODE(st.st_mode))
133
134def copystat(src, dst, symlinks=False):
135 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
136
137 If the optional flag `symlinks` is set, symlinks aren't followed if and
138 only if both `src` and `dst` are symlinks.
139
140 """
141 def _nop(*args):
142 pass
143
144 if symlinks and os.path.islink(src) and os.path.islink(dst):
145 stat_func = os.lstat
146 utime_func = os.lutimes if hasattr(os, 'lutimes') else _nop
147 chmod_func = os.lchmod if hasattr(os, 'lchmod') else _nop
148 chflags_func = os.lchflags if hasattr(os, 'lchflags') else _nop
149 else:
150 stat_func = os.stat
151 utime_func = os.utime if hasattr(os, 'utime') else _nop
152 chmod_func = os.chmod if hasattr(os, 'chmod') else _nop
153 chflags_func = os.chflags if hasattr(os, 'chflags') else _nop
154
155 st = stat_func(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000156 mode = stat.S_IMODE(st.st_mode)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100157 utime_func(dst, (st.st_atime, st.st_mtime))
158 chmod_func(dst, mode)
159 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000160 try:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100161 chflags_func(dst, st.st_flags)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000162 except OSError as why:
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000163 if (not hasattr(errno, 'EOPNOTSUPP') or
164 why.errno != errno.EOPNOTSUPP):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000165 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000166
Antoine Pitrou78091e62011-12-29 18:54:15 +0100167def copy(src, dst, symlinks=False):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000168 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000169
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000170 The destination may be a directory.
171
Antoine Pitrou78091e62011-12-29 18:54:15 +0100172 If the optional flag `symlinks` is set, symlinks won't be followed. This
173 resembles GNU's "cp -P src dst".
174
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000175 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000176 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100178 copyfile(src, dst, symlinks=symlinks)
179 copymode(src, dst, symlinks=symlinks)
Guido van Rossumc6360141990-10-13 19:23:40 +0000180
Antoine Pitrou78091e62011-12-29 18:54:15 +0100181def copy2(src, dst, symlinks=False):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000182 """Copy data and all stat info ("cp -p src dst").
183
184 The destination may be a directory.
185
Antoine Pitrou78091e62011-12-29 18:54:15 +0100186 If the optional flag `symlinks` is set, symlinks won't be followed. This
187 resembles GNU's "cp -P src dst".
188
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000189 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000190 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000191 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100192 copyfile(src, dst, symlinks=symlinks)
193 copystat(src, dst, symlinks=symlinks)
Guido van Rossumc6360141990-10-13 19:23:40 +0000194
Georg Brandl2ee470f2008-07-16 12:55:28 +0000195def ignore_patterns(*patterns):
196 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000197
Georg Brandl2ee470f2008-07-16 12:55:28 +0000198 Patterns is a sequence of glob-style patterns
199 that are used to exclude files"""
200 def _ignore_patterns(path, names):
201 ignored_names = []
202 for pattern in patterns:
203 ignored_names.extend(fnmatch.filter(names, pattern))
204 return set(ignored_names)
205 return _ignore_patterns
206
Tarek Ziadéfb437512010-04-20 08:57:33 +0000207def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
208 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000209 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000210
211 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000212 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000213
214 If the optional symlinks flag is true, symbolic links in the
215 source tree result in symbolic links in the destination tree; if
216 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000217 links are copied. If the file pointed by the symlink doesn't
218 exist, an exception will be added in the list of errors raised in
219 an Error exception at the end of the copy process.
220
221 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000222 want to silence this exception. Notice that this has no effect on
223 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000224
Georg Brandl2ee470f2008-07-16 12:55:28 +0000225 The optional ignore argument is a callable. If given, it
226 is called with the `src` parameter, which is the directory
227 being visited by copytree(), and `names` which is the list of
228 `src` contents, as returned by os.listdir():
229
230 callable(src, names) -> ignored_names
231
232 Since copytree() is called recursively, the callable will be
233 called once for each directory that is copied. It returns a
234 list of names relative to the `src` directory that should
235 not be copied.
236
Tarek Ziadé5340db32010-04-19 22:30:51 +0000237 The optional copy_function argument is a callable that will be used
238 to copy each file. It will be called with the source path and the
239 destination path as arguments. By default, copy2() is used, but any
240 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000241
242 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000243 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 if ignore is not None:
245 ignored_names = ignore(src, names)
246 else:
247 ignored_names = set()
248
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000249 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000250 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000251 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 if name in ignored_names:
253 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 srcname = os.path.join(src, name)
255 dstname = os.path.join(dst, name)
256 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000257 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000258 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000259 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100260 # We can't just leave it to `copy_function` because legacy
261 # code with a custom `copy_function` may rely on copytree
262 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000263 os.symlink(linkto, dstname)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100264 copystat(srcname, dstname, symlinks=symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000265 else:
266 # ignore dangling symlink if the flag is on
267 if not os.path.exists(linkto) and ignore_dangling_symlinks:
268 continue
269 # otherwise let the copy occurs. copy2 will raise an error
270 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000271 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000272 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000273 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000274 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000275 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000276 # catch the Error from the recursive copytree so that we can
277 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000278 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000279 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000280 except EnvironmentError as why:
281 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000282 try:
283 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000284 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000285 if WindowsError is not None and isinstance(why, WindowsError):
286 # Copying file access times may fail on Windows
287 pass
288 else:
289 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000290 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000291 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000292
Barry Warsaw234d9a92003-01-24 17:36:15 +0000293def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000294 """Recursively delete a directory tree.
295
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000296 If ignore_errors is set, errors are ignored; otherwise, if onerror
297 is set, it is called to handle the error with arguments (func,
298 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
299 path is the argument to that function that caused it to fail; and
300 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
301 is false and onerror is None, an exception is raised.
302
Guido van Rossumd7673291998-02-06 21:38:09 +0000303 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000304 if ignore_errors:
305 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000306 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000307 elif onerror is None:
308 def onerror(*args):
309 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000310 try:
311 if os.path.islink(path):
312 # symlinks to directories are forbidden, see bug #1669
313 raise OSError("Cannot call rmtree on a symbolic link")
314 except OSError:
315 onerror(os.path.islink, path, sys.exc_info())
316 # can't continue even if onerror hook returns
317 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000318 names = []
319 try:
320 names = os.listdir(path)
Éric Araujocfcc9772011-08-10 20:54:33 +0200321 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000322 onerror(os.listdir, path, sys.exc_info())
323 for name in names:
324 fullname = os.path.join(path, name)
325 try:
326 mode = os.lstat(fullname).st_mode
327 except os.error:
328 mode = 0
329 if stat.S_ISDIR(mode):
330 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000331 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000332 try:
333 os.remove(fullname)
Éric Araujocfcc9772011-08-10 20:54:33 +0200334 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000335 onerror(os.remove, fullname, sys.exc_info())
336 try:
337 os.rmdir(path)
338 except os.error:
339 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000340
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000341
Christian Heimesada8c3b2008-03-18 18:26:33 +0000342def _basename(path):
343 # A basename() variant which first strips the trailing slash, if present.
344 # Thus we always get the last component of the path, even for directories.
345 return os.path.basename(path.rstrip(os.path.sep))
346
347def move(src, dst):
348 """Recursively move a file or directory to another location. This is
349 similar to the Unix "mv" command.
350
351 If the destination is a directory or a symlink to a directory, the source
352 is moved inside the directory. The destination path must not already
353 exist.
354
355 If the destination already exists but is not a directory, it may be
356 overwritten depending on os.rename() semantics.
357
358 If the destination is on our current filesystem, then rename() is used.
359 Otherwise, src is copied to the destination and then removed.
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000360 A lot more could be done here... A look at a mv.c shows a lot of
361 the issues this implementation glosses over.
362
363 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000364 real_dst = dst
365 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200366 if _samefile(src, dst):
367 # We might be on a case insensitive filesystem,
368 # perform the rename anyway.
369 os.rename(src, dst)
370 return
371
Christian Heimesada8c3b2008-03-18 18:26:33 +0000372 real_dst = os.path.join(dst, _basename(src))
373 if os.path.exists(real_dst):
374 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000375 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000376 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200377 except OSError:
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000378 if os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000379 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000380 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000381 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000382 rmtree(src)
383 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000384 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000385 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000386
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000387def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000388 src = abspath(src)
389 dst = abspath(dst)
390 if not src.endswith(os.path.sep):
391 src += os.path.sep
392 if not dst.endswith(os.path.sep):
393 dst += os.path.sep
394 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000395
396def _get_gid(name):
397 """Returns a gid, given a group name."""
398 if getgrnam is None or name is None:
399 return None
400 try:
401 result = getgrnam(name)
402 except KeyError:
403 result = None
404 if result is not None:
405 return result[2]
406 return None
407
408def _get_uid(name):
409 """Returns an uid, given a user name."""
410 if getpwnam is None or name is None:
411 return None
412 try:
413 result = getpwnam(name)
414 except KeyError:
415 result = None
416 if result is not None:
417 return result[2]
418 return None
419
420def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
421 owner=None, group=None, logger=None):
422 """Create a (possibly compressed) tar file from all the files under
423 'base_dir'.
424
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000425 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000426
427 'owner' and 'group' can be used to define an owner and a group for the
428 archive that is being built. If not provided, the current owner and group
429 will be used.
430
Éric Araujo4433a5f2010-12-15 20:26:30 +0000431 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000432 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000433
434 Returns the output filename.
435 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000436 tar_compression = {'gzip': 'gz', None: ''}
437 compress_ext = {'gzip': '.gz'}
438
439 if _BZ2_SUPPORTED:
440 tar_compression['bzip2'] = 'bz2'
441 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000442
443 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200444 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000445 raise ValueError("bad value for 'compress', or compression format not "
446 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000447
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000448 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000449 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000450
Tarek Ziadé396fad72010-02-23 05:30:31 +0000451 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000452 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200453 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000454 if not dry_run:
455 os.makedirs(archive_dir)
456
Tarek Ziadé396fad72010-02-23 05:30:31 +0000457 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000458 if logger is not None:
459 logger.info('Creating tar archive')
460
461 uid = _get_uid(owner)
462 gid = _get_gid(group)
463
464 def _set_uid_gid(tarinfo):
465 if gid is not None:
466 tarinfo.gid = gid
467 tarinfo.gname = group
468 if uid is not None:
469 tarinfo.uid = uid
470 tarinfo.uname = owner
471 return tarinfo
472
473 if not dry_run:
474 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
475 try:
476 tar.add(base_dir, filter=_set_uid_gid)
477 finally:
478 tar.close()
479
Tarek Ziadé396fad72010-02-23 05:30:31 +0000480 return archive_name
481
Tarek Ziadée2124162010-04-21 13:35:21 +0000482def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000483 # XXX see if we want to keep an external call here
484 if verbose:
485 zipoptions = "-r"
486 else:
487 zipoptions = "-rq"
488 from distutils.errors import DistutilsExecError
489 from distutils.spawn import spawn
490 try:
491 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
492 except DistutilsExecError:
493 # XXX really should distinguish between "couldn't find
494 # external 'zip' command" and "zip failed".
495 raise ExecError("unable to create zip file '%s': "
496 "could neither import the 'zipfile' module nor "
497 "find a standalone zip utility") % zip_filename
498
499def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
500 """Create a zip file from all the files under 'base_dir'.
501
Éric Araujo4433a5f2010-12-15 20:26:30 +0000502 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000503 "zipfile" Python module (if available) or the InfoZIP "zip" utility
504 (if installed and found on the default search path). If neither tool is
505 available, raises ExecError. Returns the name of the output zip
506 file.
507 """
508 zip_filename = base_name + ".zip"
509 archive_dir = os.path.dirname(base_name)
510
511 if not os.path.exists(archive_dir):
512 if logger is not None:
513 logger.info("creating %s", archive_dir)
514 if not dry_run:
515 os.makedirs(archive_dir)
516
517 # If zipfile module is not available, try spawning an external 'zip'
518 # command.
519 try:
520 import zipfile
521 except ImportError:
522 zipfile = None
523
524 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000525 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000526 else:
527 if logger is not None:
528 logger.info("creating '%s' and adding '%s' to it",
529 zip_filename, base_dir)
530
531 if not dry_run:
532 zip = zipfile.ZipFile(zip_filename, "w",
533 compression=zipfile.ZIP_DEFLATED)
534
535 for dirpath, dirnames, filenames in os.walk(base_dir):
536 for name in filenames:
537 path = os.path.normpath(os.path.join(dirpath, name))
538 if os.path.isfile(path):
539 zip.write(path, path)
540 if logger is not None:
541 logger.info("adding '%s'", path)
542 zip.close()
543
544 return zip_filename
545
546_ARCHIVE_FORMATS = {
547 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
548 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000549 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200550 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000551 }
552
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000553if _BZ2_SUPPORTED:
554 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
555 "bzip2'ed tar-file")
556
Tarek Ziadé396fad72010-02-23 05:30:31 +0000557def get_archive_formats():
558 """Returns a list of supported formats for archiving and unarchiving.
559
560 Each element of the returned sequence is a tuple (name, description)
561 """
562 formats = [(name, registry[2]) for name, registry in
563 _ARCHIVE_FORMATS.items()]
564 formats.sort()
565 return formats
566
567def register_archive_format(name, function, extra_args=None, description=''):
568 """Registers an archive format.
569
570 name is the name of the format. function is the callable that will be
571 used to create archives. If provided, extra_args is a sequence of
572 (name, value) tuples that will be passed as arguments to the callable.
573 description can be provided to describe the format, and will be returned
574 by the get_archive_formats() function.
575 """
576 if extra_args is None:
577 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200578 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000579 raise TypeError('The %s object is not callable' % function)
580 if not isinstance(extra_args, (tuple, list)):
581 raise TypeError('extra_args needs to be a sequence')
582 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200583 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000584 raise TypeError('extra_args elements are : (arg_name, value)')
585
586 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
587
588def unregister_archive_format(name):
589 del _ARCHIVE_FORMATS[name]
590
591def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
592 dry_run=0, owner=None, group=None, logger=None):
593 """Create an archive file (eg. zip or tar).
594
595 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000596 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
597 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000598
599 'root_dir' is a directory that will be the root directory of the
600 archive; ie. we typically chdir into 'root_dir' before creating the
601 archive. 'base_dir' is the directory where we start archiving from;
602 ie. 'base_dir' will be the common prefix of all files and
603 directories in the archive. 'root_dir' and 'base_dir' both default
604 to the current directory. Returns the name of the archive file.
605
606 'owner' and 'group' are used when creating a tar archive. By default,
607 uses the current owner and group.
608 """
609 save_cwd = os.getcwd()
610 if root_dir is not None:
611 if logger is not None:
612 logger.debug("changing into '%s'", root_dir)
613 base_name = os.path.abspath(base_name)
614 if not dry_run:
615 os.chdir(root_dir)
616
617 if base_dir is None:
618 base_dir = os.curdir
619
620 kwargs = {'dry_run': dry_run, 'logger': logger}
621
622 try:
623 format_info = _ARCHIVE_FORMATS[format]
624 except KeyError:
625 raise ValueError("unknown archive format '%s'" % format)
626
627 func = format_info[0]
628 for arg, val in format_info[1]:
629 kwargs[arg] = val
630
631 if format != 'zip':
632 kwargs['owner'] = owner
633 kwargs['group'] = group
634
635 try:
636 filename = func(base_name, base_dir, **kwargs)
637 finally:
638 if root_dir is not None:
639 if logger is not None:
640 logger.debug("changing back to '%s'", save_cwd)
641 os.chdir(save_cwd)
642
643 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000644
645
646def get_unpack_formats():
647 """Returns a list of supported formats for unpacking.
648
649 Each element of the returned sequence is a tuple
650 (name, extensions, description)
651 """
652 formats = [(name, info[0], info[3]) for name, info in
653 _UNPACK_FORMATS.items()]
654 formats.sort()
655 return formats
656
657def _check_unpack_options(extensions, function, extra_args):
658 """Checks what gets registered as an unpacker."""
659 # first make sure no other unpacker is registered for this extension
660 existing_extensions = {}
661 for name, info in _UNPACK_FORMATS.items():
662 for ext in info[0]:
663 existing_extensions[ext] = name
664
665 for extension in extensions:
666 if extension in existing_extensions:
667 msg = '%s is already registered for "%s"'
668 raise RegistryError(msg % (extension,
669 existing_extensions[extension]))
670
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200671 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000672 raise TypeError('The registered function must be a callable')
673
674
675def register_unpack_format(name, extensions, function, extra_args=None,
676 description=''):
677 """Registers an unpack format.
678
679 `name` is the name of the format. `extensions` is a list of extensions
680 corresponding to the format.
681
682 `function` is the callable that will be
683 used to unpack archives. The callable will receive archives to unpack.
684 If it's unable to handle an archive, it needs to raise a ReadError
685 exception.
686
687 If provided, `extra_args` is a sequence of
688 (name, value) tuples that will be passed as arguments to the callable.
689 description can be provided to describe the format, and will be returned
690 by the get_unpack_formats() function.
691 """
692 if extra_args is None:
693 extra_args = []
694 _check_unpack_options(extensions, function, extra_args)
695 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
696
697def unregister_unpack_format(name):
698 """Removes the pack format from the registery."""
699 del _UNPACK_FORMATS[name]
700
701def _ensure_directory(path):
702 """Ensure that the parent directory of `path` exists"""
703 dirname = os.path.dirname(path)
704 if not os.path.isdir(dirname):
705 os.makedirs(dirname)
706
707def _unpack_zipfile(filename, extract_dir):
708 """Unpack zip `filename` to `extract_dir`
709 """
710 try:
711 import zipfile
712 except ImportError:
713 raise ReadError('zlib not supported, cannot unpack this archive.')
714
715 if not zipfile.is_zipfile(filename):
716 raise ReadError("%s is not a zip file" % filename)
717
718 zip = zipfile.ZipFile(filename)
719 try:
720 for info in zip.infolist():
721 name = info.filename
722
723 # don't extract absolute paths or ones with .. in them
724 if name.startswith('/') or '..' in name:
725 continue
726
727 target = os.path.join(extract_dir, *name.split('/'))
728 if not target:
729 continue
730
731 _ensure_directory(target)
732 if not name.endswith('/'):
733 # file
734 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200735 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000736 try:
737 f.write(data)
738 finally:
739 f.close()
740 del data
741 finally:
742 zip.close()
743
744def _unpack_tarfile(filename, extract_dir):
745 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
746 """
747 try:
748 tarobj = tarfile.open(filename)
749 except tarfile.TarError:
750 raise ReadError(
751 "%s is not a compressed or uncompressed tar file" % filename)
752 try:
753 tarobj.extractall(extract_dir)
754 finally:
755 tarobj.close()
756
757_UNPACK_FORMATS = {
758 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000759 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
760 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
761 }
762
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000763if _BZ2_SUPPORTED:
764 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
765 "bzip2'ed tar-file")
766
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000767def _find_unpack_format(filename):
768 for name, info in _UNPACK_FORMATS.items():
769 for extension in info[0]:
770 if filename.endswith(extension):
771 return name
772 return None
773
774def unpack_archive(filename, extract_dir=None, format=None):
775 """Unpack an archive.
776
777 `filename` is the name of the archive.
778
779 `extract_dir` is the name of the target directory, where the archive
780 is unpacked. If not provided, the current working directory is used.
781
782 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
783 other registered format. If not provided, unpack_archive will use the
784 filename extension and see if an unpacker was registered for that
785 extension.
786
787 In case none is found, a ValueError is raised.
788 """
789 if extract_dir is None:
790 extract_dir = os.getcwd()
791
792 if format is not None:
793 try:
794 format_info = _UNPACK_FORMATS[format]
795 except KeyError:
796 raise ValueError("Unknown unpack format '{0}'".format(format))
797
Nick Coghlanabf202d2011-03-16 13:52:20 -0400798 func = format_info[1]
799 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000800 else:
801 # we need to look at the registered unpackers supported extensions
802 format = _find_unpack_format(filename)
803 if format is None:
804 raise ReadError("Unknown archive format '{0}'".format(filename))
805
806 func = _UNPACK_FORMATS[format][1]
807 kwargs = dict(_UNPACK_FORMATS[format][2])
808 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200809
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200810
811if hasattr(os, 'statvfs'):
812
813 __all__.append('disk_usage')
814 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200815
816 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200817 """Return disk usage statistics about the given path.
818
819 Returned valus is a named tuple with attributes 'total', 'used' and
820 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200821 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200822 st = os.statvfs(path)
823 free = st.f_bavail * st.f_frsize
824 total = st.f_blocks * st.f_frsize
825 used = (st.f_blocks - st.f_bfree) * st.f_frsize
826 return _ntuple_diskusage(total, used, free)
827
828elif os.name == 'nt':
829
830 import nt
831 __all__.append('disk_usage')
832 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
833
834 def disk_usage(path):
835 """Return disk usage statistics about the given path.
836
837 Returned valus is a named tuple with attributes 'total', 'used' and
838 'free', which are the amount of total, used and free space, in bytes.
839 """
840 total, free = nt._getdiskusage(path)
841 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200842 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200843
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200844
Sandro Tosid902a142011-08-22 23:28:27 +0200845def chown(path, user=None, group=None):
846 """Change owner user and group of the given path.
847
848 user and group can be the uid/gid or the user/group names, and in that case,
849 they are converted to their respective uid/gid.
850 """
851
852 if user is None and group is None:
853 raise ValueError("user and/or group must be set")
854
855 _user = user
856 _group = group
857
858 # -1 means don't change it
859 if user is None:
860 _user = -1
861 # user can either be an int (the uid) or a string (the system username)
862 elif isinstance(user, str):
863 _user = _get_uid(user)
864 if _user is None:
865 raise LookupError("no such user: {!r}".format(user))
866
867 if group is None:
868 _group = -1
869 elif not isinstance(group, int):
870 _group = _get_gid(group)
871 if _group is None:
872 raise LookupError("no such group: {!r}".format(group))
873
874 os.chown(path, _user, _group)