blob: d6eca248eb74efe05e82cf4d689f4ca81e400fed [file] [log] [blame]
Guido van Rossumab096c91997-04-02 05:47:11 +00001"""Filename globbing utility."""
Guido van Rossum65a96201991-01-01 18:17:49 +00002
Guido van Rossumbba77af1992-01-12 23:26:24 +00003import os
Guido van Rossum9694fca1997-10-22 21:00:49 +00004import re
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import fnmatch
Guido van Rossum65a96201991-01-01 18:17:49 +00006
Johannes Gijsbers836f5432005-01-08 13:13:19 +00007__all__ = ["glob", "iglob"]
Guido van Rossumbba77af1992-01-12 23:26:24 +00008
Guido van Rossum65a96201991-01-01 18:17:49 +00009def glob(pathname):
Tim Peters07e99cb2001-01-14 23:47:14 +000010 """Return a list of paths matching a pathname pattern.
Guido van Rossumab096c91997-04-02 05:47:11 +000011
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010012 The pattern may contain simple shell-style wildcards a la
13 fnmatch. However, unlike fnmatch, filenames starting with a
14 dot are special cases that are not matched by '*' and '?'
15 patterns.
Guido van Rossumab096c91997-04-02 05:47:11 +000016
Tim Peters07e99cb2001-01-14 23:47:14 +000017 """
Johannes Gijsbers836f5432005-01-08 13:13:19 +000018 return list(iglob(pathname))
19
20def iglob(pathname):
Benjamin Petersond23f8222009-04-05 19:13:16 +000021 """Return an iterator which yields the paths matching a pathname pattern.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000022
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010023 The pattern may contain simple shell-style wildcards a la
24 fnmatch. However, unlike fnmatch, filenames starting with a
25 dot are special cases that are not matched by '*' and '?'
26 patterns.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000027
28 """
Tim Golden9b3fb0c2012-11-06 15:33:30 +000029 dirname, basename = os.path.split(pathname)
Serhiy Storchaka6f201702014-08-12 12:55:12 +030030 if not has_magic(pathname):
31 if basename:
32 if os.path.lexists(pathname):
33 yield pathname
34 else:
35 # Patterns ending with a slash should match only directories
36 if os.path.isdir(dirname):
37 yield pathname
38 return
Tim Golden9b3fb0c2012-11-06 15:33:30 +000039 if not dirname:
40 yield from glob1(None, basename)
41 return
Antoine Pitrou3d068b22012-12-16 13:49:37 +010042 # `os.path.split()` returns the argument itself as a dirname if it is a
43 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
44 # contains magic characters (i.e. r'\\?\C:').
45 if dirname != pathname and has_magic(dirname):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000046 dirs = iglob(dirname)
47 else:
48 dirs = [dirname]
49 if has_magic(basename):
50 glob_in_dir = glob1
51 else:
52 glob_in_dir = glob0
53 for dirname in dirs:
54 for name in glob_in_dir(dirname, basename):
55 yield os.path.join(dirname, name)
Johannes Gijsbers836f5432005-01-08 13:13:19 +000056
57# These 2 helper functions non-recursively glob inside a literal directory.
58# They return a list of basenames. `glob1` accepts a pattern while `glob0`
59# takes a literal basename (so it only has to check for its existence).
Guido van Rossum65a96201991-01-01 18:17:49 +000060
61def glob1(dirname, pattern):
Johannes Gijsbers836f5432005-01-08 13:13:19 +000062 if not dirname:
Guido van Rossumf0af3e32008-10-02 18:55:37 +000063 if isinstance(pattern, bytes):
64 dirname = bytes(os.curdir, 'ASCII')
65 else:
66 dirname = os.curdir
Tim Peters07e99cb2001-01-14 23:47:14 +000067 try:
68 names = os.listdir(dirname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +020069 except OSError:
Tim Peters07e99cb2001-01-14 23:47:14 +000070 return []
Hynek Schlawacke26568f2012-12-27 10:10:11 +010071 if not _ishidden(pattern):
72 names = [x for x in names if not _ishidden(x)]
Guido van Rossumd8faa362007-04-27 19:54:29 +000073 return fnmatch.filter(names, pattern)
Guido van Rossum65a96201991-01-01 18:17:49 +000074
Johannes Gijsbers836f5432005-01-08 13:13:19 +000075def glob0(dirname, basename):
Antoine Pitrou54615582012-12-16 16:03:01 +010076 if not basename:
Johannes Gijsbers836f5432005-01-08 13:13:19 +000077 # `os.path.split()` returns an empty basename for paths ending with a
78 # directory separator. 'q*x/' should match only directories.
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000079 if os.path.isdir(dirname):
Johannes Gijsbers836f5432005-01-08 13:13:19 +000080 return [basename]
81 else:
82 if os.path.lexists(os.path.join(dirname, basename)):
83 return [basename]
84 return []
85
Guido van Rossumc2ef5c21992-01-12 23:32:11 +000086
Serhiy Storchakafd32fff2013-11-18 13:06:43 +020087magic_check = re.compile('([*?[])')
88magic_check_bytes = re.compile(b'([*?[])')
Tim Golden9b3fb0c2012-11-06 15:33:30 +000089
Guido van Rossum65a96201991-01-01 18:17:49 +000090def has_magic(s):
Guido van Rossumf0af3e32008-10-02 18:55:37 +000091 if isinstance(s, bytes):
92 match = magic_check_bytes.search(s)
93 else:
94 match = magic_check.search(s)
95 return match is not None
Hynek Schlawacke26568f2012-12-27 10:10:11 +010096
97def _ishidden(path):
98 return path[0] in ('.', b'.'[0])
Serhiy Storchakafd32fff2013-11-18 13:06:43 +020099
100def escape(pathname):
101 """Escape all special characters.
102 """
103 # Escaping is done by wrapping any of "*?[" between square brackets.
104 # Metacharacters do not work in the drive part and shouldn't be escaped.
105 drive, pathname = os.path.splitdrive(pathname)
106 if isinstance(pathname, bytes):
107 pathname = magic_check_bytes.sub(br'[\1]', pathname)
108 else:
109 pathname = magic_check.sub(r'[\1]', pathname)
110 return drive + pathname