blob: 9625d362330418cd3e2fbac5f93a1e4fd60eee7a [file] [log] [blame]
Tarek Ziadéc3399782010-02-23 05:39:18 +00001"""Utility functions for copying and archiving files and directory trees.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00002
Guido van Rossum959fa011999-08-18 20:03:17 +00003XXX The functions here don't copy the resource fork or other metadata on Mac.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00004
5"""
Guido van Rossumc6360141990-10-13 19:23:40 +00006
Guido van Rossumc96207a1992-03-31 18:55:40 +00007import os
Guido van Rossum83c03e21999-02-23 23:07:51 +00008import sys
Guido van Rossum9d0a3df1997-04-29 14:45:19 +00009import stat
Brett Cannon1c3fa182004-06-19 21:11:35 +000010from os.path import abspath
Georg Brandl2ee470f2008-07-16 12:55:28 +000011import fnmatch
Tarek Ziadé396fad72010-02-23 05:30:31 +000012import collections
Antoine Pitrou910bd512010-03-22 20:11:09 +000013import errno
Tarek Ziadé6ac91722010-04-28 17:51:36 +000014import tarfile
Tarek Ziadé396fad72010-02-23 05:30:31 +000015
16try:
Tarek Ziadéffa155a2010-04-29 13:34:35 +000017 import bz2
Florent Xicluna54540ec2011-11-04 08:29:17 +010018 del bz2
Tarek Ziadéffa155a2010-04-29 13:34:35 +000019 _BZ2_SUPPORTED = True
20except ImportError:
21 _BZ2_SUPPORTED = False
22
23try:
Tarek Ziadé396fad72010-02-23 05:30:31 +000024 from pwd import getpwnam
25except ImportError:
26 getpwnam = None
27
28try:
29 from grp import getgrnam
30except ImportError:
31 getgrnam = None
Guido van Rossumc6360141990-10-13 19:23:40 +000032
Tarek Ziadéc3399782010-02-23 05:39:18 +000033__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
34 "copytree", "move", "rmtree", "Error", "SpecialFileError",
35 "ExecError", "make_archive", "get_archive_formats",
Tarek Ziadé6ac91722010-04-28 17:51:36 +000036 "register_archive_format", "unregister_archive_format",
37 "get_unpack_formats", "register_unpack_format",
Éric Araujoc5efe652011-08-21 14:30:00 +020038 "unregister_unpack_format", "unpack_archive",
Éric Araujo0ac4a5d2011-09-01 08:31:51 +020039 "ignore_patterns", "chown"]
Éric Araujoe4d5b8e2011-08-08 16:51:11 +020040 # disk_usage is added later, if available on the platform
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000041
Neal Norwitz4ce69a52005-09-01 00:45:28 +000042class Error(EnvironmentError):
Martin v. Löwise9ce0b02002-10-07 13:23:24 +000043 pass
Guido van Rossumc6360141990-10-13 19:23:40 +000044
Antoine Pitrou7fff0962009-05-01 21:09:44 +000045class SpecialFileError(EnvironmentError):
46 """Raised when trying to do a kind of operation (e.g. copying) which is
47 not supported on a special file (e.g. a named pipe)"""
48
Tarek Ziadé396fad72010-02-23 05:30:31 +000049class ExecError(EnvironmentError):
50 """Raised when a command could not be executed"""
51
Tarek Ziadé6ac91722010-04-28 17:51:36 +000052class ReadError(EnvironmentError):
53 """Raised when an archive cannot be read"""
54
55class RegistryError(Exception):
56 """Raised when a registery operation with the archiving
57 and unpacking registeries fails"""
58
59
Georg Brandl6aa2d1f2008-08-12 08:35:52 +000060try:
61 WindowsError
62except NameError:
63 WindowsError = None
64
Greg Stein42bb8b32000-07-12 09:55:30 +000065def copyfileobj(fsrc, fdst, length=16*1024):
66 """copy data from file-like object fsrc to file-like object fdst"""
67 while 1:
68 buf = fsrc.read(length)
69 if not buf:
70 break
71 fdst.write(buf)
72
Johannes Gijsbers46f14592004-08-14 13:30:02 +000073def _samefile(src, dst):
74 # Macintosh, Unix.
Tarek Ziadé1eab9cc2010-04-19 21:19:57 +000075 if hasattr(os.path, 'samefile'):
Johannes Gijsbersf9a098e2004-08-14 14:51:01 +000076 try:
77 return os.path.samefile(src, dst)
78 except OSError:
79 return False
Johannes Gijsbers46f14592004-08-14 13:30:02 +000080
81 # All other platforms: check for same pathname.
82 return (os.path.normcase(os.path.abspath(src)) ==
83 os.path.normcase(os.path.abspath(dst)))
Tim Peters495ad3c2001-01-15 01:36:40 +000084
Antoine Pitrou78091e62011-12-29 18:54:15 +010085def copyfile(src, dst, symlinks=False):
86 """Copy data from src to dst.
87
88 If optional flag `symlinks` is set and `src` is a symbolic link, a new
89 symlink will be created instead of copying the file it points to.
90
91 """
Johannes Gijsbers46f14592004-08-14 13:30:02 +000092 if _samefile(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +000093 raise Error("`%s` and `%s` are the same file" % (src, dst))
Johannes Gijsbers46f14592004-08-14 13:30:02 +000094
Antoine Pitrou7fff0962009-05-01 21:09:44 +000095 for fn in [src, dst]:
96 try:
97 st = os.stat(fn)
98 except OSError:
99 # File most likely does not exist
100 pass
Benjamin Petersonc0d98aa2009-06-05 19:13:27 +0000101 else:
102 # XXX What about other special files? (sockets, devices...)
103 if stat.S_ISFIFO(st.st_mode):
104 raise SpecialFileError("`%s` is a named pipe" % fn)
Tarek Ziadéb01142b2010-05-05 22:43:04 +0000105
Antoine Pitrou78091e62011-12-29 18:54:15 +0100106 if symlinks and os.path.islink(src):
107 os.symlink(os.readlink(src), dst)
108 else:
109 with open(src, 'rb') as fsrc:
110 with open(dst, 'wb') as fdst:
111 copyfileobj(fsrc, fdst)
Guido van Rossumc6360141990-10-13 19:23:40 +0000112
Antoine Pitrou78091e62011-12-29 18:54:15 +0100113def copymode(src, dst, symlinks=False):
114 """Copy mode bits from src to dst.
Guido van Rossumc6360141990-10-13 19:23:40 +0000115
Antoine Pitrou78091e62011-12-29 18:54:15 +0100116 If the optional flag `symlinks` is set, symlinks aren't followed if and
117 only if both `src` and `dst` are symlinks. If `lchmod` isn't available (eg.
118 Linux), in these cases, this method does nothing.
119
120 """
121 if symlinks and os.path.islink(src) and os.path.islink(dst):
122 if hasattr(os, 'lchmod'):
123 stat_func, chmod_func = os.lstat, os.lchmod
124 else:
125 return
126 elif hasattr(os, 'chmod'):
127 stat_func, chmod_func = os.stat, os.chmod
128 else:
129 return
130
131 st = stat_func(src)
132 chmod_func(dst, stat.S_IMODE(st.st_mode))
133
134def copystat(src, dst, symlinks=False):
135 """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
136
137 If the optional flag `symlinks` is set, symlinks aren't followed if and
138 only if both `src` and `dst` are symlinks.
139
140 """
Larry Hastingsb1454482012-05-03 12:56:44 -0700141 def _nop(*args, ns=None):
Antoine Pitrou78091e62011-12-29 18:54:15 +0100142 pass
143
144 if symlinks and os.path.islink(src) and os.path.islink(dst):
145 stat_func = os.lstat
146 utime_func = os.lutimes if hasattr(os, 'lutimes') else _nop
147 chmod_func = os.lchmod if hasattr(os, 'lchmod') else _nop
148 chflags_func = os.lchflags if hasattr(os, 'lchflags') else _nop
149 else:
150 stat_func = os.stat
151 utime_func = os.utime if hasattr(os, 'utime') else _nop
152 chmod_func = os.chmod if hasattr(os, 'chmod') else _nop
153 chflags_func = os.chflags if hasattr(os, 'chflags') else _nop
154
155 st = stat_func(src)
Walter Dörwald294bbf32002-06-06 09:48:13 +0000156 mode = stat.S_IMODE(st.st_mode)
Larry Hastings76ad59b2012-05-03 00:30:07 -0700157 utime_func(dst, ns=(st.st_atime_ns, st.st_mtime_ns))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100158 chmod_func(dst, mode)
159 if hasattr(st, 'st_flags'):
Antoine Pitrou910bd512010-03-22 20:11:09 +0000160 try:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100161 chflags_func(dst, st.st_flags)
Antoine Pitrou910bd512010-03-22 20:11:09 +0000162 except OSError as why:
Ned Deilybaf75712012-05-10 17:05:19 -0700163 for err in 'EOPNOTSUPP', 'ENOTSUP':
164 if hasattr(errno, err) and why.errno == getattr(errno, err):
165 break
166 else:
Antoine Pitrou910bd512010-03-22 20:11:09 +0000167 raise
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000168
Antoine Pitrou78091e62011-12-29 18:54:15 +0100169def copy(src, dst, symlinks=False):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000170 """Copy data and mode bits ("cp src dst").
Tim Peters495ad3c2001-01-15 01:36:40 +0000171
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000172 The destination may be a directory.
173
Antoine Pitrou78091e62011-12-29 18:54:15 +0100174 If the optional flag `symlinks` is set, symlinks won't be followed. This
175 resembles GNU's "cp -P src dst".
176
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000177 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000178 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000179 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100180 copyfile(src, dst, symlinks=symlinks)
181 copymode(src, dst, symlinks=symlinks)
Guido van Rossumc6360141990-10-13 19:23:40 +0000182
Antoine Pitrou78091e62011-12-29 18:54:15 +0100183def copy2(src, dst, symlinks=False):
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000184 """Copy data and all stat info ("cp -p src dst").
185
186 The destination may be a directory.
187
Antoine Pitrou78091e62011-12-29 18:54:15 +0100188 If the optional flag `symlinks` is set, symlinks won't be followed. This
189 resembles GNU's "cp -P src dst".
190
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000191 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000192 if os.path.isdir(dst):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000193 dst = os.path.join(dst, os.path.basename(src))
Antoine Pitrou78091e62011-12-29 18:54:15 +0100194 copyfile(src, dst, symlinks=symlinks)
195 copystat(src, dst, symlinks=symlinks)
Guido van Rossumc6360141990-10-13 19:23:40 +0000196
Georg Brandl2ee470f2008-07-16 12:55:28 +0000197def ignore_patterns(*patterns):
198 """Function that can be used as copytree() ignore parameter.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000199
Georg Brandl2ee470f2008-07-16 12:55:28 +0000200 Patterns is a sequence of glob-style patterns
201 that are used to exclude files"""
202 def _ignore_patterns(path, names):
203 ignored_names = []
204 for pattern in patterns:
205 ignored_names.extend(fnmatch.filter(names, pattern))
206 return set(ignored_names)
207 return _ignore_patterns
208
Tarek Ziadéfb437512010-04-20 08:57:33 +0000209def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
210 ignore_dangling_symlinks=False):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000211 """Recursively copy a directory tree.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000212
213 The destination directory must not already exist.
Neal Norwitza4c93b62003-02-23 21:36:32 +0000214 If exception(s) occur, an Error is raised with a list of reasons.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000215
216 If the optional symlinks flag is true, symbolic links in the
217 source tree result in symbolic links in the destination tree; if
218 it is false, the contents of the files pointed to by symbolic
Tarek Ziadéfb437512010-04-20 08:57:33 +0000219 links are copied. If the file pointed by the symlink doesn't
220 exist, an exception will be added in the list of errors raised in
221 an Error exception at the end of the copy process.
222
223 You can set the optional ignore_dangling_symlinks flag to true if you
Tarek Ziadé8c26c7d2010-04-23 13:03:50 +0000224 want to silence this exception. Notice that this has no effect on
225 platforms that don't support os.symlink.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000226
Georg Brandl2ee470f2008-07-16 12:55:28 +0000227 The optional ignore argument is a callable. If given, it
228 is called with the `src` parameter, which is the directory
229 being visited by copytree(), and `names` which is the list of
230 `src` contents, as returned by os.listdir():
231
232 callable(src, names) -> ignored_names
233
234 Since copytree() is called recursively, the callable will be
235 called once for each directory that is copied. It returns a
236 list of names relative to the `src` directory that should
237 not be copied.
238
Tarek Ziadé5340db32010-04-19 22:30:51 +0000239 The optional copy_function argument is a callable that will be used
240 to copy each file. It will be called with the source path and the
241 destination path as arguments. By default, copy2() is used, but any
242 function that supports the same signature (like copy()) can be used.
Guido van Rossum9d0a3df1997-04-29 14:45:19 +0000243
244 """
Guido van Rossuma2baf461997-04-29 14:06:46 +0000245 names = os.listdir(src)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000246 if ignore is not None:
247 ignored_names = ignore(src, names)
248 else:
249 ignored_names = set()
250
Johannes Gijsberse4172ea2005-01-08 12:31:29 +0000251 os.makedirs(dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000252 errors = []
Guido van Rossuma2baf461997-04-29 14:06:46 +0000253 for name in names:
Georg Brandl2ee470f2008-07-16 12:55:28 +0000254 if name in ignored_names:
255 continue
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000256 srcname = os.path.join(src, name)
257 dstname = os.path.join(dst, name)
258 try:
Tarek Ziadéfb437512010-04-20 08:57:33 +0000259 if os.path.islink(srcname):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000260 linkto = os.readlink(srcname)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000261 if symlinks:
Antoine Pitrou78091e62011-12-29 18:54:15 +0100262 # We can't just leave it to `copy_function` because legacy
263 # code with a custom `copy_function` may rely on copytree
264 # doing the right thing.
Tarek Ziadéfb437512010-04-20 08:57:33 +0000265 os.symlink(linkto, dstname)
Antoine Pitrou78091e62011-12-29 18:54:15 +0100266 copystat(srcname, dstname, symlinks=symlinks)
Tarek Ziadéfb437512010-04-20 08:57:33 +0000267 else:
268 # ignore dangling symlink if the flag is on
269 if not os.path.exists(linkto) and ignore_dangling_symlinks:
270 continue
271 # otherwise let the copy occurs. copy2 will raise an error
272 copy_function(srcname, dstname)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000273 elif os.path.isdir(srcname):
Tarek Ziadé5340db32010-04-19 22:30:51 +0000274 copytree(srcname, dstname, symlinks, ignore, copy_function)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000275 else:
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000276 # Will raise a SpecialFileError for unsupported file types
Tarek Ziadé5340db32010-04-19 22:30:51 +0000277 copy_function(srcname, dstname)
Georg Brandla1be88e2005-08-31 22:48:45 +0000278 # catch the Error from the recursive copytree so that we can
279 # continue with other files
Guido van Rossumb940e112007-01-10 16:19:56 +0000280 except Error as err:
Georg Brandla1be88e2005-08-31 22:48:45 +0000281 errors.extend(err.args[0])
Antoine Pitrou7fff0962009-05-01 21:09:44 +0000282 except EnvironmentError as why:
283 errors.append((srcname, dstname, str(why)))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000284 try:
285 copystat(src, dst)
Guido van Rossumb940e112007-01-10 16:19:56 +0000286 except OSError as why:
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000287 if WindowsError is not None and isinstance(why, WindowsError):
288 # Copying file access times may fail on Windows
289 pass
290 else:
291 errors.extend((src, dst, str(why)))
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000292 if errors:
Collin Winterce36ad82007-08-30 01:19:48 +0000293 raise Error(errors)
Guido van Rossumd7673291998-02-06 21:38:09 +0000294
Barry Warsaw234d9a92003-01-24 17:36:15 +0000295def rmtree(path, ignore_errors=False, onerror=None):
Guido van Rossumd7673291998-02-06 21:38:09 +0000296 """Recursively delete a directory tree.
297
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000298 If ignore_errors is set, errors are ignored; otherwise, if onerror
299 is set, it is called to handle the error with arguments (func,
300 path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
301 path is the argument to that function that caused it to fail; and
302 exc_info is a tuple returned by sys.exc_info(). If ignore_errors
303 is false and onerror is None, an exception is raised.
304
Guido van Rossumd7673291998-02-06 21:38:09 +0000305 """
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000306 if ignore_errors:
307 def onerror(*args):
Barry Warsaw234d9a92003-01-24 17:36:15 +0000308 pass
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000309 elif onerror is None:
310 def onerror(*args):
311 raise
Christian Heimes9bd667a2008-01-20 15:14:11 +0000312 try:
313 if os.path.islink(path):
314 # symlinks to directories are forbidden, see bug #1669
315 raise OSError("Cannot call rmtree on a symbolic link")
316 except OSError:
317 onerror(os.path.islink, path, sys.exc_info())
318 # can't continue even if onerror hook returns
319 return
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000320 names = []
321 try:
322 names = os.listdir(path)
Éric Araujocfcc9772011-08-10 20:54:33 +0200323 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000324 onerror(os.listdir, path, sys.exc_info())
325 for name in names:
326 fullname = os.path.join(path, name)
327 try:
328 mode = os.lstat(fullname).st_mode
329 except os.error:
330 mode = 0
331 if stat.S_ISDIR(mode):
332 rmtree(fullname, ignore_errors, onerror)
Barry Warsaw234d9a92003-01-24 17:36:15 +0000333 else:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000334 try:
335 os.remove(fullname)
Éric Araujocfcc9772011-08-10 20:54:33 +0200336 except os.error:
Johannes Gijsbersef5ffc42004-10-31 12:05:31 +0000337 onerror(os.remove, fullname, sys.exc_info())
338 try:
339 os.rmdir(path)
340 except os.error:
341 onerror(os.rmdir, path, sys.exc_info())
Guido van Rossumd7673291998-02-06 21:38:09 +0000342
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000343
Christian Heimesada8c3b2008-03-18 18:26:33 +0000344def _basename(path):
345 # A basename() variant which first strips the trailing slash, if present.
346 # Thus we always get the last component of the path, even for directories.
347 return os.path.basename(path.rstrip(os.path.sep))
348
349def move(src, dst):
350 """Recursively move a file or directory to another location. This is
351 similar to the Unix "mv" command.
352
353 If the destination is a directory or a symlink to a directory, the source
354 is moved inside the directory. The destination path must not already
355 exist.
356
357 If the destination already exists but is not a directory, it may be
358 overwritten depending on os.rename() semantics.
359
360 If the destination is on our current filesystem, then rename() is used.
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100361 Otherwise, src is copied to the destination and then removed. Symlinks are
362 recreated under the new name if os.rename() fails because of cross
363 filesystem renames.
364
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000365 A lot more could be done here... A look at a mv.c shows a lot of
366 the issues this implementation glosses over.
367
368 """
Christian Heimesada8c3b2008-03-18 18:26:33 +0000369 real_dst = dst
370 if os.path.isdir(dst):
Ronald Oussorenf51738b2011-05-06 10:23:04 +0200371 if _samefile(src, dst):
372 # We might be on a case insensitive filesystem,
373 # perform the rename anyway.
374 os.rename(src, dst)
375 return
376
Christian Heimesada8c3b2008-03-18 18:26:33 +0000377 real_dst = os.path.join(dst, _basename(src))
378 if os.path.exists(real_dst):
379 raise Error("Destination path '%s' already exists" % real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000380 try:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000381 os.rename(src, real_dst)
Éric Araujocfcc9772011-08-10 20:54:33 +0200382 except OSError:
Antoine Pitrou0a08d7a2012-01-06 20:16:19 +0100383 if os.path.islink(src):
384 linkto = os.readlink(src)
385 os.symlink(linkto, real_dst)
386 os.unlink(src)
387 elif os.path.isdir(src):
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000388 if _destinsrc(src, dst):
Collin Winterce36ad82007-08-30 01:19:48 +0000389 raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
Christian Heimesada8c3b2008-03-18 18:26:33 +0000390 copytree(src, real_dst, symlinks=True)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000391 rmtree(src)
392 else:
Christian Heimesada8c3b2008-03-18 18:26:33 +0000393 copy2(src, real_dst)
Martin v. Löwise9ce0b02002-10-07 13:23:24 +0000394 os.unlink(src)
Brett Cannon1c3fa182004-06-19 21:11:35 +0000395
Benjamin Peterson247a9b82009-02-20 04:09:19 +0000396def _destinsrc(src, dst):
Antoine Pitrou0dcc3cd2009-01-29 20:26:59 +0000397 src = abspath(src)
398 dst = abspath(dst)
399 if not src.endswith(os.path.sep):
400 src += os.path.sep
401 if not dst.endswith(os.path.sep):
402 dst += os.path.sep
403 return dst.startswith(src)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000404
405def _get_gid(name):
406 """Returns a gid, given a group name."""
407 if getgrnam is None or name is None:
408 return None
409 try:
410 result = getgrnam(name)
411 except KeyError:
412 result = None
413 if result is not None:
414 return result[2]
415 return None
416
417def _get_uid(name):
418 """Returns an uid, given a user name."""
419 if getpwnam is None or name is None:
420 return None
421 try:
422 result = getpwnam(name)
423 except KeyError:
424 result = None
425 if result is not None:
426 return result[2]
427 return None
428
429def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
430 owner=None, group=None, logger=None):
431 """Create a (possibly compressed) tar file from all the files under
432 'base_dir'.
433
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000434 'compress' must be "gzip" (the default), "bzip2", or None.
Tarek Ziadé396fad72010-02-23 05:30:31 +0000435
436 'owner' and 'group' can be used to define an owner and a group for the
437 archive that is being built. If not provided, the current owner and group
438 will be used.
439
Éric Araujo4433a5f2010-12-15 20:26:30 +0000440 The output tar file will be named 'base_name' + ".tar", possibly plus
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000441 the appropriate compression extension (".gz", or ".bz2").
Tarek Ziadé396fad72010-02-23 05:30:31 +0000442
443 Returns the output filename.
444 """
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000445 tar_compression = {'gzip': 'gz', None: ''}
446 compress_ext = {'gzip': '.gz'}
447
448 if _BZ2_SUPPORTED:
449 tar_compression['bzip2'] = 'bz2'
450 compress_ext['bzip2'] = '.bz2'
Tarek Ziadé396fad72010-02-23 05:30:31 +0000451
452 # flags for compression program, each element of list will be an argument
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200453 if compress is not None and compress not in compress_ext:
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000454 raise ValueError("bad value for 'compress', or compression format not "
455 "supported : {0}".format(compress))
Tarek Ziadé396fad72010-02-23 05:30:31 +0000456
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000457 archive_name = base_name + '.tar' + compress_ext.get(compress, '')
Tarek Ziadé396fad72010-02-23 05:30:31 +0000458 archive_dir = os.path.dirname(archive_name)
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000459
Tarek Ziadé396fad72010-02-23 05:30:31 +0000460 if not os.path.exists(archive_dir):
Éric Araujoac4e58e2011-01-29 20:32:11 +0000461 if logger is not None:
Éric Araujo43a7ee12011-08-19 02:55:11 +0200462 logger.info("creating %s", archive_dir)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000463 if not dry_run:
464 os.makedirs(archive_dir)
465
Tarek Ziadé396fad72010-02-23 05:30:31 +0000466 # creating the tarball
Tarek Ziadé396fad72010-02-23 05:30:31 +0000467 if logger is not None:
468 logger.info('Creating tar archive')
469
470 uid = _get_uid(owner)
471 gid = _get_gid(group)
472
473 def _set_uid_gid(tarinfo):
474 if gid is not None:
475 tarinfo.gid = gid
476 tarinfo.gname = group
477 if uid is not None:
478 tarinfo.uid = uid
479 tarinfo.uname = owner
480 return tarinfo
481
482 if not dry_run:
483 tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
484 try:
485 tar.add(base_dir, filter=_set_uid_gid)
486 finally:
487 tar.close()
488
Tarek Ziadé396fad72010-02-23 05:30:31 +0000489 return archive_name
490
Tarek Ziadée2124162010-04-21 13:35:21 +0000491def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000492 # XXX see if we want to keep an external call here
493 if verbose:
494 zipoptions = "-r"
495 else:
496 zipoptions = "-rq"
497 from distutils.errors import DistutilsExecError
498 from distutils.spawn import spawn
499 try:
500 spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
501 except DistutilsExecError:
502 # XXX really should distinguish between "couldn't find
503 # external 'zip' command" and "zip failed".
504 raise ExecError("unable to create zip file '%s': "
505 "could neither import the 'zipfile' module nor "
506 "find a standalone zip utility") % zip_filename
507
508def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
509 """Create a zip file from all the files under 'base_dir'.
510
Éric Araujo4433a5f2010-12-15 20:26:30 +0000511 The output zip file will be named 'base_name' + ".zip". Uses either the
Tarek Ziadé396fad72010-02-23 05:30:31 +0000512 "zipfile" Python module (if available) or the InfoZIP "zip" utility
513 (if installed and found on the default search path). If neither tool is
514 available, raises ExecError. Returns the name of the output zip
515 file.
516 """
517 zip_filename = base_name + ".zip"
518 archive_dir = os.path.dirname(base_name)
519
520 if not os.path.exists(archive_dir):
521 if logger is not None:
522 logger.info("creating %s", archive_dir)
523 if not dry_run:
524 os.makedirs(archive_dir)
525
526 # If zipfile module is not available, try spawning an external 'zip'
527 # command.
528 try:
529 import zipfile
530 except ImportError:
531 zipfile = None
532
533 if zipfile is None:
Tarek Ziadée2124162010-04-21 13:35:21 +0000534 _call_external_zip(base_dir, zip_filename, verbose, dry_run)
Tarek Ziadé396fad72010-02-23 05:30:31 +0000535 else:
536 if logger is not None:
537 logger.info("creating '%s' and adding '%s' to it",
538 zip_filename, base_dir)
539
540 if not dry_run:
541 zip = zipfile.ZipFile(zip_filename, "w",
542 compression=zipfile.ZIP_DEFLATED)
543
544 for dirpath, dirnames, filenames in os.walk(base_dir):
545 for name in filenames:
546 path = os.path.normpath(os.path.join(dirpath, name))
547 if os.path.isfile(path):
548 zip.write(path, path)
549 if logger is not None:
550 logger.info("adding '%s'", path)
551 zip.close()
552
553 return zip_filename
554
555_ARCHIVE_FORMATS = {
556 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
Tarek Ziadé396fad72010-02-23 05:30:31 +0000557 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200558 'zip': (_make_zipfile, [], "ZIP file")
Tarek Ziadé396fad72010-02-23 05:30:31 +0000559 }
560
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000561if _BZ2_SUPPORTED:
562 _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
563 "bzip2'ed tar-file")
564
Tarek Ziadé396fad72010-02-23 05:30:31 +0000565def get_archive_formats():
566 """Returns a list of supported formats for archiving and unarchiving.
567
568 Each element of the returned sequence is a tuple (name, description)
569 """
570 formats = [(name, registry[2]) for name, registry in
571 _ARCHIVE_FORMATS.items()]
572 formats.sort()
573 return formats
574
575def register_archive_format(name, function, extra_args=None, description=''):
576 """Registers an archive format.
577
578 name is the name of the format. function is the callable that will be
579 used to create archives. If provided, extra_args is a sequence of
580 (name, value) tuples that will be passed as arguments to the callable.
581 description can be provided to describe the format, and will be returned
582 by the get_archive_formats() function.
583 """
584 if extra_args is None:
585 extra_args = []
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200586 if not callable(function):
Tarek Ziadé396fad72010-02-23 05:30:31 +0000587 raise TypeError('The %s object is not callable' % function)
588 if not isinstance(extra_args, (tuple, list)):
589 raise TypeError('extra_args needs to be a sequence')
590 for element in extra_args:
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200591 if not isinstance(element, (tuple, list)) or len(element) !=2:
Tarek Ziadé396fad72010-02-23 05:30:31 +0000592 raise TypeError('extra_args elements are : (arg_name, value)')
593
594 _ARCHIVE_FORMATS[name] = (function, extra_args, description)
595
596def unregister_archive_format(name):
597 del _ARCHIVE_FORMATS[name]
598
599def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
600 dry_run=0, owner=None, group=None, logger=None):
601 """Create an archive file (eg. zip or tar).
602
603 'base_name' is the name of the file to create, minus any format-specific
Tarek Ziadé5e2be872010-04-20 21:40:47 +0000604 extension; 'format' is the archive format: one of "zip", "tar", "bztar"
605 or "gztar".
Tarek Ziadé396fad72010-02-23 05:30:31 +0000606
607 'root_dir' is a directory that will be the root directory of the
608 archive; ie. we typically chdir into 'root_dir' before creating the
609 archive. 'base_dir' is the directory where we start archiving from;
610 ie. 'base_dir' will be the common prefix of all files and
611 directories in the archive. 'root_dir' and 'base_dir' both default
612 to the current directory. Returns the name of the archive file.
613
614 'owner' and 'group' are used when creating a tar archive. By default,
615 uses the current owner and group.
616 """
617 save_cwd = os.getcwd()
618 if root_dir is not None:
619 if logger is not None:
620 logger.debug("changing into '%s'", root_dir)
621 base_name = os.path.abspath(base_name)
622 if not dry_run:
623 os.chdir(root_dir)
624
625 if base_dir is None:
626 base_dir = os.curdir
627
628 kwargs = {'dry_run': dry_run, 'logger': logger}
629
630 try:
631 format_info = _ARCHIVE_FORMATS[format]
632 except KeyError:
633 raise ValueError("unknown archive format '%s'" % format)
634
635 func = format_info[0]
636 for arg, val in format_info[1]:
637 kwargs[arg] = val
638
639 if format != 'zip':
640 kwargs['owner'] = owner
641 kwargs['group'] = group
642
643 try:
644 filename = func(base_name, base_dir, **kwargs)
645 finally:
646 if root_dir is not None:
647 if logger is not None:
648 logger.debug("changing back to '%s'", save_cwd)
649 os.chdir(save_cwd)
650
651 return filename
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000652
653
654def get_unpack_formats():
655 """Returns a list of supported formats for unpacking.
656
657 Each element of the returned sequence is a tuple
658 (name, extensions, description)
659 """
660 formats = [(name, info[0], info[3]) for name, info in
661 _UNPACK_FORMATS.items()]
662 formats.sort()
663 return formats
664
665def _check_unpack_options(extensions, function, extra_args):
666 """Checks what gets registered as an unpacker."""
667 # first make sure no other unpacker is registered for this extension
668 existing_extensions = {}
669 for name, info in _UNPACK_FORMATS.items():
670 for ext in info[0]:
671 existing_extensions[ext] = name
672
673 for extension in extensions:
674 if extension in existing_extensions:
675 msg = '%s is already registered for "%s"'
676 raise RegistryError(msg % (extension,
677 existing_extensions[extension]))
678
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200679 if not callable(function):
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000680 raise TypeError('The registered function must be a callable')
681
682
683def register_unpack_format(name, extensions, function, extra_args=None,
684 description=''):
685 """Registers an unpack format.
686
687 `name` is the name of the format. `extensions` is a list of extensions
688 corresponding to the format.
689
690 `function` is the callable that will be
691 used to unpack archives. The callable will receive archives to unpack.
692 If it's unable to handle an archive, it needs to raise a ReadError
693 exception.
694
695 If provided, `extra_args` is a sequence of
696 (name, value) tuples that will be passed as arguments to the callable.
697 description can be provided to describe the format, and will be returned
698 by the get_unpack_formats() function.
699 """
700 if extra_args is None:
701 extra_args = []
702 _check_unpack_options(extensions, function, extra_args)
703 _UNPACK_FORMATS[name] = extensions, function, extra_args, description
704
705def unregister_unpack_format(name):
706 """Removes the pack format from the registery."""
707 del _UNPACK_FORMATS[name]
708
709def _ensure_directory(path):
710 """Ensure that the parent directory of `path` exists"""
711 dirname = os.path.dirname(path)
712 if not os.path.isdir(dirname):
713 os.makedirs(dirname)
714
715def _unpack_zipfile(filename, extract_dir):
716 """Unpack zip `filename` to `extract_dir`
717 """
718 try:
719 import zipfile
720 except ImportError:
721 raise ReadError('zlib not supported, cannot unpack this archive.')
722
723 if not zipfile.is_zipfile(filename):
724 raise ReadError("%s is not a zip file" % filename)
725
726 zip = zipfile.ZipFile(filename)
727 try:
728 for info in zip.infolist():
729 name = info.filename
730
731 # don't extract absolute paths or ones with .. in them
732 if name.startswith('/') or '..' in name:
733 continue
734
735 target = os.path.join(extract_dir, *name.split('/'))
736 if not target:
737 continue
738
739 _ensure_directory(target)
740 if not name.endswith('/'):
741 # file
742 data = zip.read(info.filename)
Éric Araujoc1b7e7f2011-09-18 23:12:30 +0200743 f = open(target, 'wb')
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000744 try:
745 f.write(data)
746 finally:
747 f.close()
748 del data
749 finally:
750 zip.close()
751
752def _unpack_tarfile(filename, extract_dir):
753 """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
754 """
755 try:
756 tarobj = tarfile.open(filename)
757 except tarfile.TarError:
758 raise ReadError(
759 "%s is not a compressed or uncompressed tar file" % filename)
760 try:
761 tarobj.extractall(extract_dir)
762 finally:
763 tarobj.close()
764
765_UNPACK_FORMATS = {
766 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000767 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
768 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
769 }
770
Tarek Ziadéffa155a2010-04-29 13:34:35 +0000771if _BZ2_SUPPORTED:
772 _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
773 "bzip2'ed tar-file")
774
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000775def _find_unpack_format(filename):
776 for name, info in _UNPACK_FORMATS.items():
777 for extension in info[0]:
778 if filename.endswith(extension):
779 return name
780 return None
781
782def unpack_archive(filename, extract_dir=None, format=None):
783 """Unpack an archive.
784
785 `filename` is the name of the archive.
786
787 `extract_dir` is the name of the target directory, where the archive
788 is unpacked. If not provided, the current working directory is used.
789
790 `format` is the archive format: one of "zip", "tar", or "gztar". Or any
791 other registered format. If not provided, unpack_archive will use the
792 filename extension and see if an unpacker was registered for that
793 extension.
794
795 In case none is found, a ValueError is raised.
796 """
797 if extract_dir is None:
798 extract_dir = os.getcwd()
799
800 if format is not None:
801 try:
802 format_info = _UNPACK_FORMATS[format]
803 except KeyError:
804 raise ValueError("Unknown unpack format '{0}'".format(format))
805
Nick Coghlanabf202d2011-03-16 13:52:20 -0400806 func = format_info[1]
807 func(filename, extract_dir, **dict(format_info[2]))
Tarek Ziadé6ac91722010-04-28 17:51:36 +0000808 else:
809 # we need to look at the registered unpackers supported extensions
810 format = _find_unpack_format(filename)
811 if format is None:
812 raise ReadError("Unknown archive format '{0}'".format(filename))
813
814 func = _UNPACK_FORMATS[format][1]
815 kwargs = dict(_UNPACK_FORMATS[format][2])
816 func(filename, extract_dir, **kwargs)
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200817
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200818
819if hasattr(os, 'statvfs'):
820
821 __all__.append('disk_usage')
822 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200823
824 def disk_usage(path):
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200825 """Return disk usage statistics about the given path.
826
Sandro Tosif8ae4fa2012-04-23 20:07:15 +0200827 Returned value is a named tuple with attributes 'total', 'used' and
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200828 'free', which are the amount of total, used and free space, in bytes.
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200829 """
Éric Araujoe4d5b8e2011-08-08 16:51:11 +0200830 st = os.statvfs(path)
831 free = st.f_bavail * st.f_frsize
832 total = st.f_blocks * st.f_frsize
833 used = (st.f_blocks - st.f_bfree) * st.f_frsize
834 return _ntuple_diskusage(total, used, free)
835
836elif os.name == 'nt':
837
838 import nt
839 __all__.append('disk_usage')
840 _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
841
842 def disk_usage(path):
843 """Return disk usage statistics about the given path.
844
845 Returned valus is a named tuple with attributes 'total', 'used' and
846 'free', which are the amount of total, used and free space, in bytes.
847 """
848 total, free = nt._getdiskusage(path)
849 used = total - free
Giampaolo Rodola'210e7ca2011-07-01 13:55:36 +0200850 return _ntuple_diskusage(total, used, free)
Sandro Tosid902a142011-08-22 23:28:27 +0200851
Éric Araujo0ac4a5d2011-09-01 08:31:51 +0200852
Sandro Tosid902a142011-08-22 23:28:27 +0200853def chown(path, user=None, group=None):
854 """Change owner user and group of the given path.
855
856 user and group can be the uid/gid or the user/group names, and in that case,
857 they are converted to their respective uid/gid.
858 """
859
860 if user is None and group is None:
861 raise ValueError("user and/or group must be set")
862
863 _user = user
864 _group = group
865
866 # -1 means don't change it
867 if user is None:
868 _user = -1
869 # user can either be an int (the uid) or a string (the system username)
870 elif isinstance(user, str):
871 _user = _get_uid(user)
872 if _user is None:
873 raise LookupError("no such user: {!r}".format(user))
874
875 if group is None:
876 _group = -1
877 elif not isinstance(group, int):
878 _group = _get_gid(group)
879 if _group is None:
880 raise LookupError("no such group: {!r}".format(group))
881
882 os.chown(path, _user, _group)
Antoine Pitroubcf2b592012-02-08 23:28:36 +0100883
884def get_terminal_size(fallback=(80, 24)):
885 """Get the size of the terminal window.
886
887 For each of the two dimensions, the environment variable, COLUMNS
888 and LINES respectively, is checked. If the variable is defined and
889 the value is a positive integer, it is used.
890
891 When COLUMNS or LINES is not defined, which is the common case,
892 the terminal connected to sys.__stdout__ is queried
893 by invoking os.get_terminal_size.
894
895 If the terminal size cannot be successfully queried, either because
896 the system doesn't support querying, or because we are not
897 connected to a terminal, the value given in fallback parameter
898 is used. Fallback defaults to (80, 24) which is the default
899 size used by many terminal emulators.
900
901 The value returned is a named tuple of type os.terminal_size.
902 """
903 # columns, lines are the working values
904 try:
905 columns = int(os.environ['COLUMNS'])
906 except (KeyError, ValueError):
907 columns = 0
908
909 try:
910 lines = int(os.environ['LINES'])
911 except (KeyError, ValueError):
912 lines = 0
913
914 # only query if necessary
915 if columns <= 0 or lines <= 0:
916 try:
917 size = os.get_terminal_size(sys.__stdout__.fileno())
918 except (NameError, OSError):
919 size = os.terminal_size(fallback)
920 if columns <= 0:
921 columns = size.columns
922 if lines <= 0:
923 lines = size.lines
924
925 return os.terminal_size((columns, lines))