blob: 4a92f18cb91945b9107abb1636768a7d4c75616a [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path. The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Mac, Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. macpath, ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
Guido van Rossum346f7af1997-12-05 19:04:51 +000011"""
Guido van Rossumc6360141990-10-13 19:23:40 +000012
Guido van Rossumd3876d31996-07-23 03:47:28 +000013import os
Guido van Rossum40d93041990-10-21 16:17:34 +000014import stat
Guido van Rossumc6360141990-10-13 19:23:40 +000015
Skip Montanaroc62c81e2001-02-12 02:00:42 +000016__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
17 "basename","dirname","commonprefix","getsize","getmtime",
18 "getatime","islink","exists","isdir","isfile","ismount",
19 "walk","expanduser","expandvars","normpath","abspath",
Mark Hammond8696ebc2002-10-08 02:44:31 +000020 "samefile","sameopenfile","samestat","supports_unicode_filenames"]
Guido van Rossumc6360141990-10-13 19:23:40 +000021
Guido van Rossum7ac48781992-01-14 18:29:32 +000022# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
23# On MS-DOS this may also turn slashes into backslashes; however, other
24# normalizations (such as optimizing '../' away) are not allowed
25# (another function should be defined to do that).
26
27def normcase(s):
Guido van Rossum346f7af1997-12-05 19:04:51 +000028 """Normalize case of pathname. Has no effect under Posix"""
29 return s
Guido van Rossum7ac48781992-01-14 18:29:32 +000030
31
Jeremy Hyltona05e2932000-06-28 14:48:01 +000032# Return whether a path is absolute.
Guido van Rossum7ac48781992-01-14 18:29:32 +000033# Trivial in Posix, harder on the Mac or MS-DOS.
34
35def isabs(s):
Guido van Rossum346f7af1997-12-05 19:04:51 +000036 """Test whether a path is absolute"""
37 return s[:1] == '/'
Guido van Rossum7ac48781992-01-14 18:29:32 +000038
39
Barry Warsaw384d2491997-02-18 21:53:25 +000040# Join pathnames.
41# Ignore the previous parts if a part is absolute.
Guido van Rossum4d0fdc31991-08-16 13:27:58 +000042# Insert a '/' unless the first part is empty or already ends in '/'.
Guido van Rossum7ac48781992-01-14 18:29:32 +000043
Barry Warsaw384d2491997-02-18 21:53:25 +000044def join(a, *p):
Guido van Rossum346f7af1997-12-05 19:04:51 +000045 """Join two or more pathname components, inserting '/' as needed"""
46 path = a
47 for b in p:
48 if b[:1] == '/':
49 path = b
50 elif path == '' or path[-1:] == '/':
51 path = path + b
52 else:
53 path = path + '/' + b
54 return path
Guido van Rossumc6360141990-10-13 19:23:40 +000055
56
Guido van Rossum26847381992-03-31 18:54:35 +000057# Split a path in head (everything up to the last '/') and tail (the
Guido van Rossuma89b1ba1995-09-01 20:32:21 +000058# rest). If the path ends in '/', tail will be empty. If there is no
59# '/' in the path, head will be empty.
60# Trailing '/'es are stripped from head unless it is the root.
Guido van Rossum7ac48781992-01-14 18:29:32 +000061
Guido van Rossumc6360141990-10-13 19:23:40 +000062def split(p):
Tim Peters2344fae2001-01-15 00:50:52 +000063 """Split a pathname. Returns tuple "(head, tail)" where "tail" is
Fred Drakec0ab93e2000-09-28 16:22:52 +000064 everything after the final slash. Either part may be empty."""
Fred Drake22fb8392000-09-28 15:04:39 +000065 i = p.rfind('/') + 1
Guido van Rossum346f7af1997-12-05 19:04:51 +000066 head, tail = p[:i], p[i:]
Fred Drake8152d322000-12-12 23:20:45 +000067 if head and head != '/'*len(head):
Guido van Rossum346f7af1997-12-05 19:04:51 +000068 while head[-1] == '/':
69 head = head[:-1]
70 return head, tail
Guido van Rossumc6360141990-10-13 19:23:40 +000071
72
Guido van Rossum4d0fdc31991-08-16 13:27:58 +000073# Split a path in root and extension.
Guido van Rossum422869a1996-08-20 20:24:17 +000074# The extension is everything starting at the last dot in the last
Guido van Rossum4d0fdc31991-08-16 13:27:58 +000075# pathname component; the root is everything before that.
Guido van Rossum7ac48781992-01-14 18:29:32 +000076# It is always true that root + ext == p.
77
Guido van Rossum4d0fdc31991-08-16 13:27:58 +000078def splitext(p):
Guido van Rossum346f7af1997-12-05 19:04:51 +000079 """Split the extension from a pathname. Extension is everything from the
Fred Drakec0ab93e2000-09-28 16:22:52 +000080 last dot to the end. Returns "(root, ext)", either part may be empty."""
Martin v. Löwisde333792002-12-12 20:30:20 +000081 i = p.rfind('.')
82 if i<=p.rfind('/'):
83 return p, ''
84 else:
85 return p[:i], p[i:]
Guido van Rossum4d0fdc31991-08-16 13:27:58 +000086
87
Guido van Rossum221df241995-08-07 20:17:55 +000088# Split a pathname into a drive specification and the rest of the
89# path. Useful on DOS/Windows/NT; on Unix, the drive is always empty.
90
91def splitdrive(p):
Tim Peters2344fae2001-01-15 00:50:52 +000092 """Split a pathname into drive and path. On Posix, drive is always
Fred Drakec0ab93e2000-09-28 16:22:52 +000093 empty."""
Guido van Rossum346f7af1997-12-05 19:04:51 +000094 return '', p
Guido van Rossum221df241995-08-07 20:17:55 +000095
96
Guido van Rossumc6360141990-10-13 19:23:40 +000097# Return the tail (basename) part of a path.
Guido van Rossum7ac48781992-01-14 18:29:32 +000098
Guido van Rossumc6360141990-10-13 19:23:40 +000099def basename(p):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000100 """Returns the final component of a pathname"""
101 return split(p)[1]
Guido van Rossumc6360141990-10-13 19:23:40 +0000102
103
Guido van Rossumc629d341992-11-05 10:43:02 +0000104# Return the head (dirname) part of a path.
105
106def dirname(p):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000107 """Returns the directory component of a pathname"""
108 return split(p)[0]
Guido van Rossumc629d341992-11-05 10:43:02 +0000109
110
Guido van Rossumc6360141990-10-13 19:23:40 +0000111# Return the longest prefix of all list elements.
Guido van Rossum7ac48781992-01-14 18:29:32 +0000112
Guido van Rossumc6360141990-10-13 19:23:40 +0000113def commonprefix(m):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000114 "Given a list of pathnames, returns the longest common leading component"
115 if not m: return ''
Skip Montanaro62358312000-08-22 13:01:53 +0000116 prefix = m[0]
117 for item in m:
Guido van Rossum346f7af1997-12-05 19:04:51 +0000118 for i in range(len(prefix)):
Fred Drake8152d322000-12-12 23:20:45 +0000119 if prefix[:i+1] != item[:i+1]:
Guido van Rossum346f7af1997-12-05 19:04:51 +0000120 prefix = prefix[:i]
121 if i == 0: return ''
122 break
Skip Montanaro62358312000-08-22 13:01:53 +0000123 return prefix
Guido van Rossumc6360141990-10-13 19:23:40 +0000124
125
Guido van Rossum2bc1f8f1998-07-24 20:49:26 +0000126# Get size, mtime, atime of files.
127
128def getsize(filename):
129 """Return the size of a file, reported by os.stat()."""
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000130 return os.stat(filename).st_size
Guido van Rossum2bc1f8f1998-07-24 20:49:26 +0000131
132def getmtime(filename):
133 """Return the last modification time of a file, reported by os.stat()."""
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000134 return os.stat(filename).st_mtime
Guido van Rossum2bc1f8f1998-07-24 20:49:26 +0000135
136def getatime(filename):
137 """Return the last access time of a file, reported by os.stat()."""
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000138 return os.stat(filename).st_atime
Guido van Rossum2bc1f8f1998-07-24 20:49:26 +0000139
140
Guido van Rossum7ac48781992-01-14 18:29:32 +0000141# Is a path a symbolic link?
Guido van Rossumd3876d31996-07-23 03:47:28 +0000142# This will always return false on systems where os.lstat doesn't exist.
Guido van Rossum7ac48781992-01-14 18:29:32 +0000143
144def islink(path):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000145 """Test whether a path is a symbolic link"""
146 try:
147 st = os.lstat(path)
148 except (os.error, AttributeError):
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000150 return stat.S_ISLNK(st.st_mode)
Guido van Rossum7ac48781992-01-14 18:29:32 +0000151
152
153# Does a path exist?
154# This is false for dangling symbolic links.
155
Guido van Rossumc6360141990-10-13 19:23:40 +0000156def exists(path):
Tim Petersbc0e9102002-04-04 22:55:58 +0000157 """Test whether a path exists. Returns False for broken symbolic links"""
Guido van Rossum346f7af1997-12-05 19:04:51 +0000158 try:
159 st = os.stat(path)
160 except os.error:
Tim Petersbc0e9102002-04-04 22:55:58 +0000161 return False
162 return True
Guido van Rossumc6360141990-10-13 19:23:40 +0000163
164
Guido van Rossumd3876d31996-07-23 03:47:28 +0000165# Is a path a directory?
Guido van Rossum7ac48781992-01-14 18:29:32 +0000166# This follows symbolic links, so both islink() and isdir() can be true
167# for the same path.
168
Guido van Rossumc6360141990-10-13 19:23:40 +0000169def isdir(path):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000170 """Test whether a path is a directory"""
171 try:
172 st = os.stat(path)
173 except os.error:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000174 return False
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000175 return stat.S_ISDIR(st.st_mode)
Guido van Rossumc6360141990-10-13 19:23:40 +0000176
177
Guido van Rossum26847381992-03-31 18:54:35 +0000178# Is a path a regular file?
Guido van Rossumb6775db1994-08-01 11:34:53 +0000179# This follows symbolic links, so both islink() and isfile() can be true
Guido van Rossum7ac48781992-01-14 18:29:32 +0000180# for the same path.
181
182def isfile(path):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000183 """Test whether a path is a regular file"""
184 try:
185 st = os.stat(path)
186 except os.error:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000187 return False
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000188 return stat.S_ISREG(st.st_mode)
Guido van Rossumc6360141990-10-13 19:23:40 +0000189
190
Guido van Rossumd3778f91991-11-12 15:37:40 +0000191# Are two filenames really pointing to the same file?
Guido van Rossum7ac48781992-01-14 18:29:32 +0000192
Guido van Rossumd3778f91991-11-12 15:37:40 +0000193def samefile(f1, f2):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000194 """Test whether two pathnames reference the same actual file"""
195 s1 = os.stat(f1)
196 s2 = os.stat(f2)
197 return samestat(s1, s2)
Guido van Rossumd3778f91991-11-12 15:37:40 +0000198
199
200# Are two open files really referencing the same file?
201# (Not necessarily the same file descriptor!)
Guido van Rossum7ac48781992-01-14 18:29:32 +0000202
Guido van Rossumd3778f91991-11-12 15:37:40 +0000203def sameopenfile(fp1, fp2):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000204 """Test whether two open file objects reference the same file"""
205 s1 = os.fstat(fp1)
206 s2 = os.fstat(fp2)
207 return samestat(s1, s2)
Guido van Rossumd3778f91991-11-12 15:37:40 +0000208
209
210# Are two stat buffers (obtained from stat, fstat or lstat)
211# describing the same file?
Guido van Rossum7ac48781992-01-14 18:29:32 +0000212
Guido van Rossumd3778f91991-11-12 15:37:40 +0000213def samestat(s1, s2):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000214 """Test whether two stat buffers reference the same file"""
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000215 return s1.st_ino == s2.st_ino and \
216 s1.st_dev == s2.st_dev
Guido van Rossumc6360141990-10-13 19:23:40 +0000217
218
219# Is a path a mount point?
Guido van Rossumd3876d31996-07-23 03:47:28 +0000220# (Does this work for all UNIXes? Is it even guaranteed to work by Posix?)
Guido van Rossum7ac48781992-01-14 18:29:32 +0000221
Guido van Rossumc6360141990-10-13 19:23:40 +0000222def ismount(path):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000223 """Test whether a path is a mount point"""
224 try:
225 s1 = os.stat(path)
226 s2 = os.stat(join(path, '..'))
227 except os.error:
Tim Petersbc0e9102002-04-04 22:55:58 +0000228 return False # It doesn't exist -- so not a mount point :-)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000229 dev1 = s1.st_dev
230 dev2 = s2.st_dev
Guido van Rossum346f7af1997-12-05 19:04:51 +0000231 if dev1 != dev2:
Tim Petersbc0e9102002-04-04 22:55:58 +0000232 return True # path/.. on a different device as path
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000233 ino1 = s1.st_ino
234 ino2 = s2.st_ino
Guido van Rossum346f7af1997-12-05 19:04:51 +0000235 if ino1 == ino2:
Tim Petersbc0e9102002-04-04 22:55:58 +0000236 return True # path/.. is the same i-node as path
237 return False
Guido van Rossumc6360141990-10-13 19:23:40 +0000238
239
240# Directory tree walk.
Guido van Rossum7ac48781992-01-14 18:29:32 +0000241# For each directory under top (including top itself, but excluding
242# '.' and '..'), func(arg, dirname, filenames) is called, where
243# dirname is the name of the directory and filenames is the list
Guido van Rossum346f7af1997-12-05 19:04:51 +0000244# of files (and subdirectories etc.) in the directory.
Guido van Rossum7ac48781992-01-14 18:29:32 +0000245# The func may modify the filenames list, to implement a filter,
Guido van Rossumc6360141990-10-13 19:23:40 +0000246# or to impose a different order of visiting.
Guido van Rossum7ac48781992-01-14 18:29:32 +0000247
Guido van Rossumc6360141990-10-13 19:23:40 +0000248def walk(top, func, arg):
Tim Peterscf5e6a42001-10-10 04:16:20 +0000249 """Directory tree walk with callback function.
250
251 For each directory in the directory tree rooted at top (including top
252 itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
253 dirname is the name of the directory, and fnames a list of the names of
254 the files and subdirectories in dirname (excluding '.' and '..'). func
255 may modify the fnames list in-place (e.g. via del or slice assignment),
256 and walk will only recurse into the subdirectories whose names remain in
257 fnames; this can be used to implement a filter, or to impose a specific
258 order of visiting. No semantics are defined for, or required of, arg,
259 beyond that arg is always passed to func. It can be used, e.g., to pass
260 a filename pattern, or a mutable object designed to accumulate
261 statistics. Passing None for arg is common."""
262
Guido van Rossum346f7af1997-12-05 19:04:51 +0000263 try:
264 names = os.listdir(top)
265 except os.error:
266 return
267 func(arg, top, names)
Guido van Rossum346f7af1997-12-05 19:04:51 +0000268 for name in names:
Tim Peters2344fae2001-01-15 00:50:52 +0000269 name = join(top, name)
Guido van Rossuma490d582001-04-16 18:12:04 +0000270 try:
271 st = os.lstat(name)
272 except os.error:
273 continue
Neal Norwitzec7cf132002-06-06 18:16:14 +0000274 if stat.S_ISDIR(st.st_mode):
Tim Peters2344fae2001-01-15 00:50:52 +0000275 walk(name, func, arg)
Guido van Rossum7ac48781992-01-14 18:29:32 +0000276
277
278# Expand paths beginning with '~' or '~user'.
279# '~' means $HOME; '~user' means that user's home directory.
280# If the path doesn't begin with '~', or if the user or $HOME is unknown,
281# the path is returned unchanged (leaving error reporting to whatever
282# function is called with the expanded path as argument).
283# See also module 'glob' for expansion of *, ? and [...] in pathnames.
284# (A function should also be defined to do full *sh-style environment
285# variable expansion.)
286
287def expanduser(path):
Tim Peters2344fae2001-01-15 00:50:52 +0000288 """Expand ~ and ~user constructions. If user or $HOME is unknown,
Fred Drakec0ab93e2000-09-28 16:22:52 +0000289 do nothing."""
Fred Drake8152d322000-12-12 23:20:45 +0000290 if path[:1] != '~':
Guido van Rossum346f7af1997-12-05 19:04:51 +0000291 return path
292 i, n = 1, len(path)
Fred Drake8152d322000-12-12 23:20:45 +0000293 while i < n and path[i] != '/':
Fred Drakec0ab93e2000-09-28 16:22:52 +0000294 i = i + 1
Guido van Rossum346f7af1997-12-05 19:04:51 +0000295 if i == 1:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000296 if not 'HOME' in os.environ:
Neal Norwitz609ba812002-09-05 21:08:25 +0000297 import pwd
298 userhome = pwd.getpwuid(os.getuid())[5]
299 else:
300 userhome = os.environ['HOME']
Guido van Rossum346f7af1997-12-05 19:04:51 +0000301 else:
302 import pwd
303 try:
304 pwent = pwd.getpwnam(path[1:i])
305 except KeyError:
306 return path
307 userhome = pwent[5]
Fred Drakec0ab93e2000-09-28 16:22:52 +0000308 if userhome[-1:] == '/': i = i + 1
Guido van Rossum346f7af1997-12-05 19:04:51 +0000309 return userhome + path[i:]
Guido van Rossum4732ccf1992-08-09 13:54:50 +0000310
311
312# Expand paths containing shell variable substitutions.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000313# This expands the forms $variable and ${variable} only.
Jeremy Hyltona05e2932000-06-28 14:48:01 +0000314# Non-existent variables are left unchanged.
Guido van Rossumb6775db1994-08-01 11:34:53 +0000315
316_varprog = None
Guido van Rossum4732ccf1992-08-09 13:54:50 +0000317
318def expandvars(path):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000319 """Expand shell variables of form $var and ${var}. Unknown variables
Fred Drakec0ab93e2000-09-28 16:22:52 +0000320 are left unchanged."""
Guido van Rossum346f7af1997-12-05 19:04:51 +0000321 global _varprog
322 if '$' not in path:
323 return path
324 if not _varprog:
325 import re
326 _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
327 i = 0
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000328 while True:
Guido van Rossum346f7af1997-12-05 19:04:51 +0000329 m = _varprog.search(path, i)
330 if not m:
331 break
332 i, j = m.span(0)
333 name = m.group(1)
334 if name[:1] == '{' and name[-1:] == '}':
335 name = name[1:-1]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000336 if name in os.environ:
Guido van Rossum346f7af1997-12-05 19:04:51 +0000337 tail = path[j:]
338 path = path[:i] + os.environ[name]
339 i = len(path)
340 path = path + tail
341 else:
342 i = j
343 return path
Guido van Rossumc629d341992-11-05 10:43:02 +0000344
345
346# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
347# It should be understood that this may change the meaning of the path
348# if it contains symbolic links!
349
350def normpath(path):
Guido van Rossum346f7af1997-12-05 19:04:51 +0000351 """Normalize path, eliminating double slashes, etc."""
Skip Montanaro018dfae2000-07-19 17:09:51 +0000352 if path == '':
353 return '.'
Marc-André Lemburgbf222c92001-01-29 11:29:44 +0000354 initial_slashes = path.startswith('/')
355 # POSIX allows one or two initial slashes, but treats three or more
356 # as single slash.
Tim Peters658cba62001-02-09 20:06:00 +0000357 if (initial_slashes and
Marc-André Lemburgbf222c92001-01-29 11:29:44 +0000358 path.startswith('//') and not path.startswith('///')):
359 initial_slashes = 2
Fred Drake22fb8392000-09-28 15:04:39 +0000360 comps = path.split('/')
Skip Montanaro018dfae2000-07-19 17:09:51 +0000361 new_comps = []
362 for comp in comps:
363 if comp in ('', '.'):
364 continue
Marc-André Lemburgbf222c92001-01-29 11:29:44 +0000365 if (comp != '..' or (not initial_slashes and not new_comps) or
Skip Montanaro018dfae2000-07-19 17:09:51 +0000366 (new_comps and new_comps[-1] == '..')):
367 new_comps.append(comp)
368 elif new_comps:
369 new_comps.pop()
370 comps = new_comps
Fred Drake22fb8392000-09-28 15:04:39 +0000371 path = '/'.join(comps)
Marc-André Lemburgbf222c92001-01-29 11:29:44 +0000372 if initial_slashes:
373 path = '/'*initial_slashes + path
Skip Montanaro018dfae2000-07-19 17:09:51 +0000374 return path or '.'
Guido van Rossume294cf61999-01-29 18:05:18 +0000375
376
Guido van Rossume294cf61999-01-29 18:05:18 +0000377def abspath(path):
Guido van Rossum54f22ed2000-02-04 15:10:34 +0000378 """Return an absolute path."""
Guido van Rossume294cf61999-01-29 18:05:18 +0000379 if not isabs(path):
380 path = join(os.getcwd(), path)
381 return normpath(path)
Guido van Rossum83eeef42001-09-17 15:16:09 +0000382
383
384# Return a canonical path (i.e. the absolute location of a file on the
385# filesystem).
386
387def realpath(filename):
388 """Return the canonical path of the specified filename, eliminating any
389symbolic links encountered in the path."""
390 filename = abspath(filename)
391
392 bits = ['/'] + filename.split('/')[1:]
393 for i in range(2, len(bits)+1):
394 component = join(*bits[0:i])
395 if islink(component):
396 resolved = os.readlink(component)
397 (dir, file) = split(component)
398 resolved = normpath(join(dir, resolved))
399 newpath = join(*([resolved] + bits[i:]))
400 return realpath(newpath)
Tim Petersb64bec32001-09-18 02:26:39 +0000401
Guido van Rossum83eeef42001-09-17 15:16:09 +0000402 return filename
Mark Hammond8696ebc2002-10-08 02:44:31 +0000403
404supports_unicode_filenames = False