blob: d4fc13f4ff3ab1098af93231ff758b84d68682af [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001"""Common operations on Posix pathnames.
2
3Instead of importing this module directly, import os and refer to
4this module as os.path. The "os.path" name is an alias for this
5module on Posix systems; on other systems (e.g. Mac, Windows),
6os.path provides the same operations in a manner specific to that
7platform, and is an alias to another module (e.g. macpath, ntpath).
8
9Some of this can actually be useful on non-Posix systems too, e.g.
10for manipulation of the pathname component of URLs.
11"""
12
13import os
14import stat
15
16__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
17 "basename","dirname","commonprefix","getsize","getmtime",
18 "getatime","getctime","islink","exists","lexists","isdir","isfile",
19 "walk","expanduser","expandvars","normpath","abspath",
20 "samefile",
21 "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
22 "devnull","realpath","supports_unicode_filenames"]
23
24# strings representing various path-related bits and pieces
25curdir = '.'
26pardir = '..'
27extsep = '.'
28sep = '/'
29pathsep = ':'
30defpath = ':/bin:/usr/bin'
31altsep = None
32devnull = '/dev/null'
33
34# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
35# On MS-DOS this may also turn slashes into backslashes; however, other
36# normalizations (such as optimizing '../' away) are not allowed
37# (another function should be defined to do that).
38
39def normcase(s):
40 """Normalize case of pathname. Has no effect under Posix"""
41 return s
42
43
44# Return whether a path is absolute.
45# Trivial in Posix, harder on the Mac or MS-DOS.
46
47def isabs(s):
48 """Test whether a path is absolute"""
49 return s.startswith('/')
50
51
52# Join pathnames.
53# Ignore the previous parts if a part is absolute.
54# Insert a '/' unless the first part is empty or already ends in '/'.
55
56def join(a, *p):
57 """Join two or more pathname components, inserting '/' as needed"""
58 path = a
59 for b in p:
60 if b.startswith('/'):
61 path = b
62 elif path == '' or path.endswith('/'):
63 path += b
64 else:
65 path += '/' + b
66 return path
67
68
69# Split a path in head (everything up to the last '/') and tail (the
70# rest). If the path ends in '/', tail will be empty. If there is no
71# '/' in the path, head will be empty.
72# Trailing '/'es are stripped from head unless it is the root.
73
74def split(p):
75 """Split a pathname. Returns tuple "(head, tail)" where "tail" is
76 everything after the final slash. Either part may be empty."""
77 i = p.rfind('/') + 1
78 head, tail = p[:i], p[i:]
79 if head and head != '/'*len(head):
80 head = head.rstrip('/')
81 return head, tail
82
83
84# Split a path in root and extension.
85# The extension is everything starting at the last dot in the last
86# pathname component; the root is everything before that.
87# It is always true that root + ext == p.
88
89def splitext(p):
90 """Split the extension from a pathname. Extension is everything from the
91 last dot to the end. Returns "(root, ext)", either part may be empty."""
92 i = p.rfind('.')
93 if i<=p.rfind('/'):
94 return p, ''
95 else:
96 return p[:i], p[i:]
97
98
99# Split a pathname into a drive specification and the rest of the
100# path. Useful on DOS/Windows/NT; on Unix, the drive is always empty.
101
102def splitdrive(p):
103 """Split a pathname into drive and path. On Posix, drive is always
104 empty."""
105 return '', p
106
107
108# Return the tail (basename) part of a path.
109
110def basename(p):
111 """Returns the final component of a pathname"""
112 return split(p)[1]
113
114
115# Return the head (dirname) part of a path.
116
117def dirname(p):
118 """Returns the directory component of a pathname"""
119 return split(p)[0]
120
121
122# Return the longest prefix of all list elements.
123
124def commonprefix(m):
125 "Given a list of pathnames, returns the longest common leading component"
126 if not m: return ''
127 s1 = min(m)
128 s2 = max(m)
129 n = min(len(s1), len(s2))
130 for i in xrange(n):
131 if s1[i] != s2[i]:
132 return s1[:i]
133 return s1[:n]
134
135# Get size, mtime, atime of files.
136
137def getsize(filename):
138 """Return the size of a file, reported by os.stat()."""
139 return os.stat(filename).st_size
140
141def getmtime(filename):
142 """Return the last modification time of a file, reported by os.stat()."""
143 return os.stat(filename).st_mtime
144
145def getatime(filename):
146 """Return the last access time of a file, reported by os.stat()."""
147 return os.stat(filename).st_atime
148
149def getctime(filename):
150 """Return the metadata change time of a file, reported by os.stat()."""
151 return os.stat(filename).st_ctime
152
153# Is a path a symbolic link?
154# This will always return false on systems where os.lstat doesn't exist.
155
156def islink(path):
157 """Test whether a path is a symbolic link"""
158 try:
159 st = os.lstat(path)
160 except (os.error, AttributeError):
161 return False
162 return stat.S_ISLNK(st.st_mode)
163
164
165# Does a path exist?
166# This is false for dangling symbolic links.
167
168def exists(path):
169 """Test whether a path exists. Returns False for broken symbolic links"""
170 try:
171 st = os.stat(path)
172 except os.error:
173 return False
174 return True
175
176
177# Being true for dangling symbolic links is also useful.
178
179def lexists(path):
180 """Test whether a path exists. Returns True for broken symbolic links"""
181 try:
182 st = os.lstat(path)
183 except os.error:
184 return False
185 return True
186
187
188# Is a path a directory?
189# This follows symbolic links, so both islink() and isdir() can be true
190# for the same path.
191
192def isdir(path):
193 """Test whether a path is a directory"""
194 try:
195 st = os.stat(path)
196 except os.error:
197 return False
198 return stat.S_ISDIR(st.st_mode)
199
200
201# Is a path a regular file?
202# This follows symbolic links, so both islink() and isfile() can be true
203# for the same path.
204
205def isfile(path):
206 """Test whether a path is a regular file"""
207 try:
208 st = os.stat(path)
209 except os.error:
210 return False
211 return stat.S_ISREG(st.st_mode)
212
213
214# Are two filenames really pointing to the same file?
215
216if not os._native_posix:
217 import java.io.File
218 import java.io.IOException
219 from org.python.core.Py import newString
220
221 def samefile(f1, f2):
222 """Test whether two pathnames reference the same actual file"""
223 canon1 = newString(java.io.File(_ensure_str(f1)).getCanonicalPath())
224 canon2 = newString(java.io.File(_ensure_str(f2)).getCanonicalPath())
225 return canon1 == canon2
226else:
227 def samefile(f1, f2):
228 """Test whether two pathnames reference the same actual file"""
229 s1 = os.stat(f1)
230 s2 = os.stat(f2)
231 return samestat(s1, s2)
232
233
234# XXX: Jython currently lacks fstat
235if hasattr(os, 'fstat'):
236 # Are two open files really referencing the same file?
237 # (Not necessarily the same file descriptor!)
238
239 def sameopenfile(fp1, fp2):
240 """Test whether two open file objects reference the same file"""
241 s1 = os.fstat(fp1)
242 s2 = os.fstat(fp2)
243 return samestat(s1, s2)
244
245 __all__.append("sameopenfile")
246
247
248# XXX: Pure Java stat lacks st_ino/st_dev
249if os._native_posix:
250 # Are two stat buffers (obtained from stat, fstat or lstat)
251 # describing the same file?
252
253 def samestat(s1, s2):
254 """Test whether two stat buffers reference the same file"""
255 return s1.st_ino == s2.st_ino and \
256 s1.st_dev == s2.st_dev
257
258
259 # Is a path a mount point?
260 # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?)
261
262 def ismount(path):
263 """Test whether a path is a mount point"""
264 try:
265 s1 = os.lstat(path)
266 s2 = os.lstat(join(path, '..'))
267 except os.error:
268 return False # It doesn't exist -- so not a mount point :-)
269 dev1 = s1.st_dev
270 dev2 = s2.st_dev
271 if dev1 != dev2:
272 return True # path/.. on a different device as path
273 ino1 = s1.st_ino
274 ino2 = s2.st_ino
275 if ino1 == ino2:
276 return True # path/.. is the same i-node as path
277 return False
278
279 __all__.extend(["samestat", "ismount"])
280
281
282# Directory tree walk.
283# For each directory under top (including top itself, but excluding
284# '.' and '..'), func(arg, dirname, filenames) is called, where
285# dirname is the name of the directory and filenames is the list
286# of files (and subdirectories etc.) in the directory.
287# The func may modify the filenames list, to implement a filter,
288# or to impose a different order of visiting.
289
290def walk(top, func, arg):
291 """Directory tree walk with callback function.
292
293 For each directory in the directory tree rooted at top (including top
294 itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
295 dirname is the name of the directory, and fnames a list of the names of
296 the files and subdirectories in dirname (excluding '.' and '..'). func
297 may modify the fnames list in-place (e.g. via del or slice assignment),
298 and walk will only recurse into the subdirectories whose names remain in
299 fnames; this can be used to implement a filter, or to impose a specific
300 order of visiting. No semantics are defined for, or required of, arg,
301 beyond that arg is always passed to func. It can be used, e.g., to pass
302 a filename pattern, or a mutable object designed to accumulate
303 statistics. Passing None for arg is common."""
304
305 try:
306 names = os.listdir(top)
307 except os.error:
308 return
309 func(arg, top, names)
310 for name in names:
311 name = join(top, name)
312 try:
313 st = os.lstat(name)
314 except os.error:
315 continue
316 if stat.S_ISDIR(st.st_mode):
317 walk(name, func, arg)
318
319
320# Expand paths beginning with '~' or '~user'.
321# '~' means $HOME; '~user' means that user's home directory.
322# If the path doesn't begin with '~', or if the user or $HOME is unknown,
323# the path is returned unchanged (leaving error reporting to whatever
324# function is called with the expanded path as argument).
325# See also module 'glob' for expansion of *, ? and [...] in pathnames.
326# (A function should also be defined to do full *sh-style environment
327# variable expansion.)
328
329def expanduser(path):
330 """Expand ~ and ~user constructions. If user or $HOME is unknown,
331 do nothing."""
332 if not path.startswith('~'):
333 return path
334 i = path.find('/', 1)
335 if i < 0:
336 i = len(path)
337 if i == 1:
338 if 'HOME' not in os.environ:
339 return path
340 else:
341 userhome = os.environ['HOME']
342 else:
343 # XXX: Jython lacks the pwd module: '~user' isn't supported
344 return path
345 userhome = userhome.rstrip('/')
346 return userhome + path[i:]
347
348
349# Expand paths containing shell variable substitutions.
350# This expands the forms $variable and ${variable} only.
351# Non-existent variables are left unchanged.
352
353_varprog = None
354
355def expandvars(path):
356 """Expand shell variables of form $var and ${var}. Unknown variables
357 are left unchanged."""
358 global _varprog
359 if '$' not in path:
360 return path
361 if not _varprog:
362 import re
363 _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
364 i = 0
365 while True:
366 m = _varprog.search(path, i)
367 if not m:
368 break
369 i, j = m.span(0)
370 name = m.group(1)
371 if name.startswith('{') and name.endswith('}'):
372 name = name[1:-1]
373 if name in os.environ:
374 tail = path[j:]
375 path = path[:i] + os.environ[name]
376 i = len(path)
377 path += tail
378 else:
379 i = j
380 return path
381
382
383# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
384# It should be understood that this may change the meaning of the path
385# if it contains symbolic links!
386
387def normpath(path):
388 """Normalize path, eliminating double slashes, etc."""
389 if path == '':
390 return '.'
391 initial_slashes = path.startswith('/')
392 # POSIX allows one or two initial slashes, but treats three or more
393 # as single slash.
394 if (initial_slashes and
395 path.startswith('//') and not path.startswith('///')):
396 initial_slashes = 2
397 comps = path.split('/')
398 new_comps = []
399 for comp in comps:
400 if comp in ('', '.'):
401 continue
402 if (comp != '..' or (not initial_slashes and not new_comps) or
403 (new_comps and new_comps[-1] == '..')):
404 new_comps.append(comp)
405 elif new_comps:
406 new_comps.pop()
407 comps = new_comps
408 path = '/'.join(comps)
409 if initial_slashes:
410 path = '/'*initial_slashes + path
411 return path or '.'
412
413
414def abspath(path):
415 """Return an absolute path."""
416 if not isabs(path):
417 path = join(os.getcwd(), path)
418 return normpath(path)
419
420
421# Return a canonical path (i.e. the absolute location of a file on the
422# filesystem).
423
424def realpath(filename):
425 """Return the canonical path of the specified filename, eliminating any
426symbolic links encountered in the path."""
427 if isabs(filename):
428 bits = ['/'] + filename.split('/')[1:]
429 else:
430 bits = [''] + filename.split('/')
431
432 for i in range(2, len(bits)+1):
433 component = join(*bits[0:i])
434 # Resolve symbolic links.
435 if islink(component):
436 resolved = _resolve_link(component)
437 if resolved is None:
438 # Infinite loop -- return original component + rest of the path
439 return abspath(join(*([component] + bits[i:])))
440 else:
441 newpath = join(*([resolved] + bits[i:]))
442 return realpath(newpath)
443
444 return abspath(filename)
445
446
447if not os._native_posix:
448 def _resolve_link(path):
449 """Internal helper function. Takes a path and follows symlinks
450 until we either arrive at something that isn't a symlink, or
451 encounter a path we've seen before (meaning that there's a loop).
452 """
453 try:
454 return newString(java.io.File(abspath(path)).getCanonicalPath())
455 except java.io.IOException:
456 return None
457else:
458 def _resolve_link(path):
459 """Internal helper function. Takes a path and follows symlinks
460 until we either arrive at something that isn't a symlink, or
461 encounter a path we've seen before (meaning that there's a loop).
462 """
463 paths_seen = []
464 while islink(path):
465 if path in paths_seen:
466 # Already seen this path, so we must have a symlink loop
467 return None
468 paths_seen.append(path)
469 # Resolve where the link points to
470 resolved = os.readlink(path)
471 if not isabs(resolved):
472 dir = dirname(path)
473 path = normpath(join(dir, resolved))
474 else:
475 path = normpath(resolved)
476 return path
477
478
479def _ensure_str(obj):
480 """Ensure obj is a string, otherwise raise a TypeError"""
481 if isinstance(obj, basestring):
482 return obj
483 raise TypeError('coercing to Unicode: need string or buffer, %s found' % \
484 _type_name(obj))
485
486
487def _type_name(obj):
488 """Determine the appropriate type name of obj for display"""
489 TPFLAGS_HEAPTYPE = 1 << 9
490 type_name = ''
491 obj_type = type(obj)
492 is_heap = obj_type.__flags__ & TPFLAGS_HEAPTYPE == TPFLAGS_HEAPTYPE
493 if not is_heap and obj_type.__module__ != '__builtin__':
494 type_name = '%s.' % obj_type.__module__
495 type_name += obj_type.__name__
496 return type_name
497
498supports_unicode_filenames = False