blob: 6664599ecc8ee6dc018ff38494279bc4b0086190 [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
20except ImportError:
21 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
25except ImportError:
26 getpwnam = None
27
28try:
29 from grp import getgrnam
30except ImportError:
31 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Éric Araujo0ac4a5d2011-09-01 08:31:51 +020039 "ignore_patterns", "chown"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020040 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000041
Neal Norwitz4ce69a52005-09-01 00:45:28 +000042class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000043 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Antoine Pitrou7fff0962009-05-01 21:09:44 +000045class SpecialFileError(EnvironmentError):
46 """Raised when trying to do a kind of operation (e.g. copying) which is
47 not supported on a special file (e.g. a named pipe)"""
48
Tarek Ziadé396fad72010-02-23 05:30:31 +000049class ExecError(EnvironmentError):
50 """Raised when a command could not be executed"""
51
Tarek Ziadé6ac91722010-04-28 17:51:36 +000052class ReadError(EnvironmentError):
53 """Raised when an archive cannot be read"""
54
55class RegistryError(Exception):
56 """Raised when a registery operation with the archiving
57 and unpacking registeries fails"""
58
59
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000060try:
61 WindowsError
62except NameError:
63 WindowsError = None
64
Greg Stein42bb8b32000-07-12 09:55:30 +000065def copyfileobj(fsrc, fdst, length=16*1024):
66 """copy data from file-like object fsrc to file-like object fdst"""
67 while 1:
68 buf = fsrc.read(length)
69 if not buf:
70 break
71 fdst.write(buf)
72
Johannes Gijsbers46f14592004-08-14 13:30:02 +000073def _samefile(src, dst):
74 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000075 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000076 try:
77 return os.path.samefile(src, dst)
78 except OSError:
79 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000080
81 # All other platforms: check for same pathname.
82 return (os.path.normcase(os.path.abspath(src)) ==
83 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000084
Antoine Pitrou78091e62011-12-29 18:54:15 +010085def copyfile(src, dst, symlinks=False):
86 """Copy data from src to dst.
87
88 If optional flag `symlinks` is set and `src` is a symbolic link, a new
89 symlink will be created instead of copying the file it points to.
90
91 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000092 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000093 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000094
Antoine Pitrou7fff0962009-05-01 21:09:44 +000095 for fn in [src, dst]:
96 try:
97 st = os.stat(fn)
98 except OSError:
99 # File most likely does not exist
100 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000101 else:
102 # XXX What about other special files? (sockets, devices...)
103 if stat.S_ISFIFO(st.st_mode):
104 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000105
Antoine Pitrou78091e62011-12-29 18:54:15 +0100106 if symlinks and os.path.islink(src):
107 os.symlink(os.readlink(src), dst)
108 else:
109 with open(src, 'rb') as fsrc:
110 with open(dst, 'wb') as fdst:
111 copyfileobj(fsrc, fdst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000112
Antoine Pitrou78091e62011-12-29 18:54:15 +0100113def copymode(src, dst, symlinks=False):
114 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000115
Antoine Pitrou78091e62011-12-29 18:54:15 +0100116 If the optional flag `symlinks` is set, symlinks aren't followed if and
117 only if both `src` and `dst` are symlinks. If `lchmod` isn't available (eg.
118 Linux), in these cases, this method does nothing.
119
120 """
121 if symlinks and os.path.islink(src) and os.path.islink(dst):
122 if hasattr(os, 'lchmod'):
123 stat_func, chmod_func = os.lstat, os.lchmod
124 else:
125 return
126 elif hasattr(os, 'chmod'):
127 stat_func, chmod_func = os.stat, os.chmod
128 else:
129 return
130
131 st = stat_func(src)
132 chmod_func(dst, stat.S_IMODE(st.st_mode))
133
134def copystat(src, dst, symlinks=False):
135 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
136
137 If the optional flag `symlinks` is set, symlinks aren't followed if and
138 only if both `src` and `dst` are symlinks.
139
140 """
141 def _nop(*args):
142 pass
143
144 if symlinks and os.path.islink(src) and os.path.islink(dst):
145 stat_func = os.lstat
146 utime_func = os.lutimes if hasattr(os, 'lutimes') else _nop
147 chmod_func = os.lchmod if hasattr(os, 'lchmod') else _nop
148 chflags_func = os.lchflags if hasattr(os, 'lchflags') else _nop
149 else:
150 stat_func = os.stat
151 utime_func = os.utime if hasattr(os, 'utime') else _nop
152 chmod_func = os.chmod if hasattr(os, 'chmod') else _nop
153 chflags_func = os.chflags if hasattr(os, 'chflags') else _nop
154
155 st = stat_func(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000156 mode = stat.S_IMODE(st.st_mode)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100157 utime_func(dst, (st.st_atime, st.st_mtime))
158 chmod_func(dst, mode)
159 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000160 try:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100161 chflags_func(dst, st.st_flags)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000162 except OSError as why:
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +0000163 if (not hasattr(errno, 'EOPNOTSUPP') or
164 why.errno != errno.EOPNOTSUPP):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000165 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000166
Antoine Pitrou78091e62011-12-29 18:54:15 +0100167def copy(src, dst, symlinks=False):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000168 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000169
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000170 The destination may be a directory.
171
Antoine Pitrou78091e62011-12-29 18:54:15 +0100172 If the optional flag `symlinks` is set, symlinks won't be followed. This
173 resembles GNU's "cp -P src dst".
174
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000175 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000176 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100178 copyfile(src, dst, symlinks=symlinks)
179 copymode(src, dst, symlinks=symlinks)
Guido van Rossumc6360141990-10-13 19:23:40 +0000180
Antoine Pitrou78091e62011-12-29 18:54:15 +0100181def copy2(src, dst, symlinks=False):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000182 """Copy data and all stat info ("cp -p src dst").
183
184 The destination may be a directory.
185
Antoine Pitrou78091e62011-12-29 18:54:15 +0100186 If the optional flag `symlinks` is set, symlinks won't be followed. This
187 resembles GNU's "cp -P src dst".
188
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000189 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000190 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000191 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100192 copyfile(src, dst, symlinks=symlinks)
193 copystat(src, dst, symlinks=symlinks)
Guido van Rossumc6360141990-10-13 19:23:40 +0000194
Georg Brandl2ee470f2008-07-16 12:55:28 +0000195def ignore_patterns(*patterns):
196 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000197
Georg Brandl2ee470f2008-07-16 12:55:28 +0000198 Patterns is a sequence of glob-style patterns
199 that are used to exclude files"""
200 def _ignore_patterns(path, names):
201 ignored_names = []
202 for pattern in patterns:
203 ignored_names.extend(fnmatch.filter(names, pattern))
204 return set(ignored_names)
205 return _ignore_patterns
206
Tarek Ziadéfb437512010-04-20 08:57:33 +0000207def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
208 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000209 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000210
211 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000212 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000213
214 If the optional symlinks flag is true, symbolic links in the
215 source tree result in symbolic links in the destination tree; if
216 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000217 links are copied. If the file pointed by the symlink doesn't
218 exist, an exception will be added in the list of errors raised in
219 an Error exception at the end of the copy process.
220
221 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000222 want to silence this exception. Notice that this has no effect on
223 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000224
Georg Brandl2ee470f2008-07-16 12:55:28 +0000225 The optional ignore argument is a callable. If given, it
226 is called with the `src` parameter, which is the directory
227 being visited by copytree(), and `names` which is the list of
228 `src` contents, as returned by os.listdir():
229
230 callable(src, names) -> ignored_names
231
232 Since copytree() is called recursively, the callable will be
233 called once for each directory that is copied. It returns a
234 list of names relative to the `src` directory that should
235 not be copied.
236
Tarek Ziadé5340db32010-04-19 22:30:51 +0000237 The optional copy_function argument is a callable that will be used
238 to copy each file. It will be called with the source path and the
239 destination path as arguments. By default, copy2() is used, but any
240 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000241
242 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000243 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 if ignore is not None:
245 ignored_names = ignore(src, names)
246 else:
247 ignored_names = set()
248
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000249 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000250 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000251 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 if name in ignored_names:
253 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 srcname = os.path.join(src, name)
255 dstname = os.path.join(dst, name)
256 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000257 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000258 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000259 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100260 # We can't just leave it to `copy_function` because legacy
261 # code with a custom `copy_function` may rely on copytree
262 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000263 os.symlink(linkto, dstname)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100264 copystat(srcname, dstname, symlinks=symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000265 else:
266 # ignore dangling symlink if the flag is on
267 if not os.path.exists(linkto) and ignore_dangling_symlinks:
268 continue
269 # otherwise let the copy occurs. copy2 will raise an error
270 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000271 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000272 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000273 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000274 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000275 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000276 # catch the Error from the recursive copytree so that we can
277 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000278 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000279 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000280 except EnvironmentError as why:
281 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000282 try:
283 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000284 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000285 if WindowsError is not None and isinstance(why, WindowsError):
286 # Copying file access times may fail on Windows
287 pass
288 else:
289 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000290 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000291 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000292
Barry Warsaw234d9a92003-01-24 17:36:15 +0000293def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000294 """Recursively delete a directory tree.
295
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000296 If ignore_errors is set, errors are ignored; otherwise, if onerror
297 is set, it is called to handle the error with arguments (func,
298 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
299 path is the argument to that function that caused it to fail; and
300 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
301 is false and onerror is None, an exception is raised.
302
Guido van Rossumd7673291998-02-06 21:38:09 +0000303 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000304 if ignore_errors:
305 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000306 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000307 elif onerror is None:
308 def onerror(*args):
309 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000310 try:
311 if os.path.islink(path):
312 # symlinks to directories are forbidden, see bug #1669
313 raise OSError("Cannot call rmtree on a symbolic link")
314 except OSError:
315 onerror(os.path.islink, path, sys.exc_info())
316 # can't continue even if onerror hook returns
317 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000318 names = []
319 try:
320 names = os.listdir(path)
Éric Araujocfcc9772011-08-10 20:54:33 +0200321 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000322 onerror(os.listdir, path, sys.exc_info())
323 for name in names:
324 fullname = os.path.join(path, name)
325 try:
326 mode = os.lstat(fullname).st_mode
327 except os.error:
328 mode = 0
329 if stat.S_ISDIR(mode):
330 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000331 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000332 try:
333 os.remove(fullname)
Éric Araujocfcc9772011-08-10 20:54:33 +0200334 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000335 onerror(os.remove, fullname, sys.exc_info())
336 try:
337 os.rmdir(path)
338 except os.error:
339 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000340
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000341
Christian Heimesada8c3b2008-03-18 18:26:33 +0000342def _basename(path):
343 # A basename() variant which first strips the trailing slash, if present.
344 # Thus we always get the last component of the path, even for directories.
345 return os.path.basename(path.rstrip(os.path.sep))
346
347def move(src, dst):
348 """Recursively move a file or directory to another location. This is
349 similar to the Unix "mv" command.
350
351 If the destination is a directory or a symlink to a directory, the source
352 is moved inside the directory. The destination path must not already
353 exist.
354
355 If the destination already exists but is not a directory, it may be
356 overwritten depending on os.rename() semantics.
357
358 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100359 Otherwise, src is copied to the destination and then removed. Symlinks are
360 recreated under the new name if os.rename() fails because of cross
361 filesystem renames.
362
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000363 A lot more could be done here... A look at a mv.c shows a lot of
364 the issues this implementation glosses over.
365
366 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000367 real_dst = dst
368 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200369 if _samefile(src, dst):
370 # We might be on a case insensitive filesystem,
371 # perform the rename anyway.
372 os.rename(src, dst)
373 return
374
Christian Heimesada8c3b2008-03-18 18:26:33 +0000375 real_dst = os.path.join(dst, _basename(src))
376 if os.path.exists(real_dst):
377 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000378 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000379 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200380 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100381 if os.path.islink(src):
382 linkto = os.readlink(src)
383 os.symlink(linkto, real_dst)
384 os.unlink(src)
385 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000386 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000387 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000388 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000389 rmtree(src)
390 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000391 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000392 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000393
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000394def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000395 src = abspath(src)
396 dst = abspath(dst)
397 if not src.endswith(os.path.sep):
398 src += os.path.sep
399 if not dst.endswith(os.path.sep):
400 dst += os.path.sep
401 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000402
403def _get_gid(name):
404 """Returns a gid, given a group name."""
405 if getgrnam is None or name is None:
406 return None
407 try:
408 result = getgrnam(name)
409 except KeyError:
410 result = None
411 if result is not None:
412 return result[2]
413 return None
414
415def _get_uid(name):
416 """Returns an uid, given a user name."""
417 if getpwnam is None or name is None:
418 return None
419 try:
420 result = getpwnam(name)
421 except KeyError:
422 result = None
423 if result is not None:
424 return result[2]
425 return None
426
427def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
428 owner=None, group=None, logger=None):
429 """Create a (possibly compressed) tar file from all the files under
430 'base_dir'.
431
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000432 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000433
434 'owner' and 'group' can be used to define an owner and a group for the
435 archive that is being built. If not provided, the current owner and group
436 will be used.
437
Éric Araujo4433a5f2010-12-15 20:26:30 +0000438 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000439 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000440
441 Returns the output filename.
442 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000443 tar_compression = {'gzip': 'gz', None: ''}
444 compress_ext = {'gzip': '.gz'}
445
446 if _BZ2_SUPPORTED:
447 tar_compression['bzip2'] = 'bz2'
448 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000449
450 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200451 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000452 raise ValueError("bad value for 'compress', or compression format not "
453 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000454
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000455 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000456 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000457
Tarek Ziadé396fad72010-02-23 05:30:31 +0000458 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000459 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200460 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000461 if not dry_run:
462 os.makedirs(archive_dir)
463
Tarek Ziadé396fad72010-02-23 05:30:31 +0000464 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000465 if logger is not None:
466 logger.info('Creating tar archive')
467
468 uid = _get_uid(owner)
469 gid = _get_gid(group)
470
471 def _set_uid_gid(tarinfo):
472 if gid is not None:
473 tarinfo.gid = gid
474 tarinfo.gname = group
475 if uid is not None:
476 tarinfo.uid = uid
477 tarinfo.uname = owner
478 return tarinfo
479
480 if not dry_run:
481 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
482 try:
483 tar.add(base_dir, filter=_set_uid_gid)
484 finally:
485 tar.close()
486
Tarek Ziadé396fad72010-02-23 05:30:31 +0000487 return archive_name
488
Tarek Ziadée2124162010-04-21 13:35:21 +0000489def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000490 # XXX see if we want to keep an external call here
491 if verbose:
492 zipoptions = "-r"
493 else:
494 zipoptions = "-rq"
495 from distutils.errors import DistutilsExecError
496 from distutils.spawn import spawn
497 try:
498 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
499 except DistutilsExecError:
500 # XXX really should distinguish between "couldn't find
501 # external 'zip' command" and "zip failed".
502 raise ExecError("unable to create zip file '%s': "
503 "could neither import the 'zipfile' module nor "
504 "find a standalone zip utility") % zip_filename
505
506def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
507 """Create a zip file from all the files under 'base_dir'.
508
Éric Araujo4433a5f2010-12-15 20:26:30 +0000509 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000510 "zipfile" Python module (if available) or the InfoZIP "zip" utility
511 (if installed and found on the default search path). If neither tool is
512 available, raises ExecError. Returns the name of the output zip
513 file.
514 """
515 zip_filename = base_name + ".zip"
516 archive_dir = os.path.dirname(base_name)
517
518 if not os.path.exists(archive_dir):
519 if logger is not None:
520 logger.info("creating %s", archive_dir)
521 if not dry_run:
522 os.makedirs(archive_dir)
523
524 # If zipfile module is not available, try spawning an external 'zip'
525 # command.
526 try:
527 import zipfile
528 except ImportError:
529 zipfile = None
530
531 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000532 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000533 else:
534 if logger is not None:
535 logger.info("creating '%s' and adding '%s' to it",
536 zip_filename, base_dir)
537
538 if not dry_run:
539 zip = zipfile.ZipFile(zip_filename, "w",
540 compression=zipfile.ZIP_DEFLATED)
541
542 for dirpath, dirnames, filenames in os.walk(base_dir):
543 for name in filenames:
544 path = os.path.normpath(os.path.join(dirpath, name))
545 if os.path.isfile(path):
546 zip.write(path, path)
547 if logger is not None:
548 logger.info("adding '%s'", path)
549 zip.close()
550
551 return zip_filename
552
553_ARCHIVE_FORMATS = {
554 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000555 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200556 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000557 }
558
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000559if _BZ2_SUPPORTED:
560 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
561 "bzip2'ed tar-file")
562
Tarek Ziadé396fad72010-02-23 05:30:31 +0000563def get_archive_formats():
564 """Returns a list of supported formats for archiving and unarchiving.
565
566 Each element of the returned sequence is a tuple (name, description)
567 """
568 formats = [(name, registry[2]) for name, registry in
569 _ARCHIVE_FORMATS.items()]
570 formats.sort()
571 return formats
572
573def register_archive_format(name, function, extra_args=None, description=''):
574 """Registers an archive format.
575
576 name is the name of the format. function is the callable that will be
577 used to create archives. If provided, extra_args is a sequence of
578 (name, value) tuples that will be passed as arguments to the callable.
579 description can be provided to describe the format, and will be returned
580 by the get_archive_formats() function.
581 """
582 if extra_args is None:
583 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200584 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000585 raise TypeError('The %s object is not callable' % function)
586 if not isinstance(extra_args, (tuple, list)):
587 raise TypeError('extra_args needs to be a sequence')
588 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200589 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000590 raise TypeError('extra_args elements are : (arg_name, value)')
591
592 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
593
594def unregister_archive_format(name):
595 del _ARCHIVE_FORMATS[name]
596
597def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
598 dry_run=0, owner=None, group=None, logger=None):
599 """Create an archive file (eg. zip or tar).
600
601 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000602 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
603 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000604
605 'root_dir' is a directory that will be the root directory of the
606 archive; ie. we typically chdir into 'root_dir' before creating the
607 archive. 'base_dir' is the directory where we start archiving from;
608 ie. 'base_dir' will be the common prefix of all files and
609 directories in the archive. 'root_dir' and 'base_dir' both default
610 to the current directory. Returns the name of the archive file.
611
612 'owner' and 'group' are used when creating a tar archive. By default,
613 uses the current owner and group.
614 """
615 save_cwd = os.getcwd()
616 if root_dir is not None:
617 if logger is not None:
618 logger.debug("changing into '%s'", root_dir)
619 base_name = os.path.abspath(base_name)
620 if not dry_run:
621 os.chdir(root_dir)
622
623 if base_dir is None:
624 base_dir = os.curdir
625
626 kwargs = {'dry_run': dry_run, 'logger': logger}
627
628 try:
629 format_info = _ARCHIVE_FORMATS[format]
630 except KeyError:
631 raise ValueError("unknown archive format '%s'" % format)
632
633 func = format_info[0]
634 for arg, val in format_info[1]:
635 kwargs[arg] = val
636
637 if format != 'zip':
638 kwargs['owner'] = owner
639 kwargs['group'] = group
640
641 try:
642 filename = func(base_name, base_dir, **kwargs)
643 finally:
644 if root_dir is not None:
645 if logger is not None:
646 logger.debug("changing back to '%s'", save_cwd)
647 os.chdir(save_cwd)
648
649 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000650
651
652def get_unpack_formats():
653 """Returns a list of supported formats for unpacking.
654
655 Each element of the returned sequence is a tuple
656 (name, extensions, description)
657 """
658 formats = [(name, info[0], info[3]) for name, info in
659 _UNPACK_FORMATS.items()]
660 formats.sort()
661 return formats
662
663def _check_unpack_options(extensions, function, extra_args):
664 """Checks what gets registered as an unpacker."""
665 # first make sure no other unpacker is registered for this extension
666 existing_extensions = {}
667 for name, info in _UNPACK_FORMATS.items():
668 for ext in info[0]:
669 existing_extensions[ext] = name
670
671 for extension in extensions:
672 if extension in existing_extensions:
673 msg = '%s is already registered for "%s"'
674 raise RegistryError(msg % (extension,
675 existing_extensions[extension]))
676
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200677 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000678 raise TypeError('The registered function must be a callable')
679
680
681def register_unpack_format(name, extensions, function, extra_args=None,
682 description=''):
683 """Registers an unpack format.
684
685 `name` is the name of the format. `extensions` is a list of extensions
686 corresponding to the format.
687
688 `function` is the callable that will be
689 used to unpack archives. The callable will receive archives to unpack.
690 If it's unable to handle an archive, it needs to raise a ReadError
691 exception.
692
693 If provided, `extra_args` is a sequence of
694 (name, value) tuples that will be passed as arguments to the callable.
695 description can be provided to describe the format, and will be returned
696 by the get_unpack_formats() function.
697 """
698 if extra_args is None:
699 extra_args = []
700 _check_unpack_options(extensions, function, extra_args)
701 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
702
703def unregister_unpack_format(name):
704 """Removes the pack format from the registery."""
705 del _UNPACK_FORMATS[name]
706
707def _ensure_directory(path):
708 """Ensure that the parent directory of `path` exists"""
709 dirname = os.path.dirname(path)
710 if not os.path.isdir(dirname):
711 os.makedirs(dirname)
712
713def _unpack_zipfile(filename, extract_dir):
714 """Unpack zip `filename` to `extract_dir`
715 """
716 try:
717 import zipfile
718 except ImportError:
719 raise ReadError('zlib not supported, cannot unpack this archive.')
720
721 if not zipfile.is_zipfile(filename):
722 raise ReadError("%s is not a zip file" % filename)
723
724 zip = zipfile.ZipFile(filename)
725 try:
726 for info in zip.infolist():
727 name = info.filename
728
729 # don't extract absolute paths or ones with .. in them
730 if name.startswith('/') or '..' in name:
731 continue
732
733 target = os.path.join(extract_dir, *name.split('/'))
734 if not target:
735 continue
736
737 _ensure_directory(target)
738 if not name.endswith('/'):
739 # file
740 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200741 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000742 try:
743 f.write(data)
744 finally:
745 f.close()
746 del data
747 finally:
748 zip.close()
749
750def _unpack_tarfile(filename, extract_dir):
751 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
752 """
753 try:
754 tarobj = tarfile.open(filename)
755 except tarfile.TarError:
756 raise ReadError(
757 "%s is not a compressed or uncompressed tar file" % filename)
758 try:
759 tarobj.extractall(extract_dir)
760 finally:
761 tarobj.close()
762
763_UNPACK_FORMATS = {
764 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000765 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
766 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
767 }
768
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000769if _BZ2_SUPPORTED:
770 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
771 "bzip2'ed tar-file")
772
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000773def _find_unpack_format(filename):
774 for name, info in _UNPACK_FORMATS.items():
775 for extension in info[0]:
776 if filename.endswith(extension):
777 return name
778 return None
779
780def unpack_archive(filename, extract_dir=None, format=None):
781 """Unpack an archive.
782
783 `filename` is the name of the archive.
784
785 `extract_dir` is the name of the target directory, where the archive
786 is unpacked. If not provided, the current working directory is used.
787
788 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
789 other registered format. If not provided, unpack_archive will use the
790 filename extension and see if an unpacker was registered for that
791 extension.
792
793 In case none is found, a ValueError is raised.
794 """
795 if extract_dir is None:
796 extract_dir = os.getcwd()
797
798 if format is not None:
799 try:
800 format_info = _UNPACK_FORMATS[format]
801 except KeyError:
802 raise ValueError("Unknown unpack format '{0}'".format(format))
803
Nick Coghlanabf202d2011-03-16 13:52:20 -0400804 func = format_info[1]
805 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000806 else:
807 # we need to look at the registered unpackers supported extensions
808 format = _find_unpack_format(filename)
809 if format is None:
810 raise ReadError("Unknown archive format '{0}'".format(filename))
811
812 func = _UNPACK_FORMATS[format][1]
813 kwargs = dict(_UNPACK_FORMATS[format][2])
814 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200815
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200816
817if hasattr(os, 'statvfs'):
818
819 __all__.append('disk_usage')
820 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200821
822 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200823 """Return disk usage statistics about the given path.
824
825 Returned valus is a named tuple with attributes 'total', 'used' and
826 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200827 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200828 st = os.statvfs(path)
829 free = st.f_bavail * st.f_frsize
830 total = st.f_blocks * st.f_frsize
831 used = (st.f_blocks - st.f_bfree) * st.f_frsize
832 return _ntuple_diskusage(total, used, free)
833
834elif os.name == 'nt':
835
836 import nt
837 __all__.append('disk_usage')
838 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
839
840 def disk_usage(path):
841 """Return disk usage statistics about the given path.
842
843 Returned valus is a named tuple with attributes 'total', 'used' and
844 'free', which are the amount of total, used and free space, in bytes.
845 """
846 total, free = nt._getdiskusage(path)
847 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200848 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200849
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200850
Sandro Tosid902a142011-08-22 23:28:27 +0200851def chown(path, user=None, group=None):
852 """Change owner user and group of the given path.
853
854 user and group can be the uid/gid or the user/group names, and in that case,
855 they are converted to their respective uid/gid.
856 """
857
858 if user is None and group is None:
859 raise ValueError("user and/or group must be set")
860
861 _user = user
862 _group = group
863
864 # -1 means don't change it
865 if user is None:
866 _user = -1
867 # user can either be an int (the uid) or a string (the system username)
868 elif isinstance(user, str):
869 _user = _get_uid(user)
870 if _user is None:
871 raise LookupError("no such user: {!r}".format(user))
872
873 if group is None:
874 _group = -1
875 elif not isinstance(group, int):
876 _group = _get_gid(group)
877 if _group is None:
878 raise LookupError("no such group: {!r}".format(group))
879
880 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +0100881
882def get_terminal_size(fallback=(80, 24)):
883 """Get the size of the terminal window.
884
885 For each of the two dimensions, the environment variable, COLUMNS
886 and LINES respectively, is checked. If the variable is defined and
887 the value is a positive integer, it is used.
888
889 When COLUMNS or LINES is not defined, which is the common case,
890 the terminal connected to sys.__stdout__ is queried
891 by invoking os.get_terminal_size.
892
893 If the terminal size cannot be successfully queried, either because
894 the system doesn't support querying, or because we are not
895 connected to a terminal, the value given in fallback parameter
896 is used. Fallback defaults to (80, 24) which is the default
897 size used by many terminal emulators.
898
899 The value returned is a named tuple of type os.terminal_size.
900 """
901 # columns, lines are the working values
902 try:
903 columns = int(os.environ['COLUMNS'])
904 except (KeyError, ValueError):
905 columns = 0
906
907 try:
908 lines = int(os.environ['LINES'])
909 except (KeyError, ValueError):
910 lines = 0
911
912 # only query if necessary
913 if columns <= 0 or lines <= 0:
914 try:
915 size = os.get_terminal_size(sys.__stdout__.fileno())
916 except (NameError, OSError):
917 size = os.terminal_size(fallback)
918 if columns <= 0:
919 columns = size.columns
920 if lines <= 0:
921 lines = size.lines
922
923 return os.terminal_size((columns, lines))