blob: 3c449a90dffef7de1c60792e7a23989601121232 [file] [log] [blame]
Guido van Rossumab096c91997-04-02 05:47:11 +00001"""Filename globbing utility."""
Guido van Rossum65a96201991-01-01 18:17:49 +00002
Guido van Rossumbba77af1992-01-12 23:26:24 +00003import os
Guido van Rossum9694fca1997-10-22 21:00:49 +00004import re
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import fnmatch
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +03006import itertools
7import stat
Steve Dower60419a72019-06-24 08:42:54 -07008import sys
Guido van Rossum65a96201991-01-01 18:17:49 +00009
Serhiy Storchaka04b57002015-11-09 23:18:19 +020010__all__ = ["glob", "iglob", "escape"]
Guido van Rossumbba77af1992-01-12 23:26:24 +000011
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030012def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
Tim Peters07e99cb2001-01-14 23:47:14 +000013 """Return a list of paths matching a pathname pattern.
Guido van Rossumab096c91997-04-02 05:47:11 +000014
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010015 The pattern may contain simple shell-style wildcards a la
16 fnmatch. However, unlike fnmatch, filenames starting with a
17 dot are special cases that are not matched by '*' and '?'
18 patterns.
Guido van Rossumab096c91997-04-02 05:47:11 +000019
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030020 If recursive is true, the pattern '**' will match any files and
21 zero or more directories and subdirectories.
Tim Peters07e99cb2001-01-14 23:47:14 +000022 """
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030023 return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive))
Johannes Gijsbers836f5432005-01-08 13:13:19 +000024
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030025def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
Benjamin Petersond23f8222009-04-05 19:13:16 +000026 """Return an iterator which yields the paths matching a pathname pattern.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000027
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010028 The pattern may contain simple shell-style wildcards a la
29 fnmatch. However, unlike fnmatch, filenames starting with a
30 dot are special cases that are not matched by '*' and '?'
31 patterns.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000032
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030033 If recursive is true, the pattern '**' will match any files and
34 zero or more directories and subdirectories.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000035 """
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030036 if root_dir is not None:
37 root_dir = os.fspath(root_dir)
38 else:
39 root_dir = pathname[:0]
40 it = _iglob(pathname, root_dir, dir_fd, recursive, False)
41 if not pathname or recursive and _isrecursive(pathname[:2]):
42 try:
43 s = next(it) # skip empty string
44 if s:
45 it = itertools.chain((s,), it)
46 except StopIteration:
47 pass
Serhiy Storchaka735b7902015-11-09 23:12:07 +020048 return it
49
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030050def _iglob(pathname, root_dir, dir_fd, recursive, dironly):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000051 dirname, basename = os.path.split(pathname)
Serhiy Storchaka6f201702014-08-12 12:55:12 +030052 if not has_magic(pathname):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030053 assert not dironly
Serhiy Storchaka6f201702014-08-12 12:55:12 +030054 if basename:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030055 if _lexists(_join(root_dir, pathname), dir_fd):
Serhiy Storchaka6f201702014-08-12 12:55:12 +030056 yield pathname
57 else:
58 # Patterns ending with a slash should match only directories
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030059 if _isdir(_join(root_dir, dirname), dir_fd):
Serhiy Storchaka6f201702014-08-12 12:55:12 +030060 yield pathname
61 return
Tim Golden9b3fb0c2012-11-06 15:33:30 +000062 if not dirname:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030063 if recursive and _isrecursive(basename):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030064 yield from _glob2(root_dir, basename, dir_fd, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030065 else:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030066 yield from _glob1(root_dir, basename, dir_fd, dironly)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000067 return
Antoine Pitrou3d068b22012-12-16 13:49:37 +010068 # `os.path.split()` returns the argument itself as a dirname if it is a
69 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
70 # contains magic characters (i.e. r'\\?\C:').
71 if dirname != pathname and has_magic(dirname):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030072 dirs = _iglob(dirname, root_dir, dir_fd, recursive, True)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000073 else:
74 dirs = [dirname]
75 if has_magic(basename):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030076 if recursive and _isrecursive(basename):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030077 glob_in_dir = _glob2
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030078 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030079 glob_in_dir = _glob1
Tim Golden9b3fb0c2012-11-06 15:33:30 +000080 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030081 glob_in_dir = _glob0
Tim Golden9b3fb0c2012-11-06 15:33:30 +000082 for dirname in dirs:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030083 for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000084 yield os.path.join(dirname, name)
Johannes Gijsbers836f5432005-01-08 13:13:19 +000085
86# These 2 helper functions non-recursively glob inside a literal directory.
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030087# They return a list of basenames. _glob1 accepts a pattern while _glob0
Johannes Gijsbers836f5432005-01-08 13:13:19 +000088# takes a literal basename (so it only has to check for its existence).
Guido van Rossum65a96201991-01-01 18:17:49 +000089
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030090def _glob1(dirname, pattern, dir_fd, dironly):
91 names = list(_iterdir(dirname, dir_fd, dironly))
Hynek Schlawacke26568f2012-12-27 10:10:11 +010092 if not _ishidden(pattern):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030093 names = (x for x in names if not _ishidden(x))
Guido van Rossumd8faa362007-04-27 19:54:29 +000094 return fnmatch.filter(names, pattern)
Guido van Rossum65a96201991-01-01 18:17:49 +000095
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030096def _glob0(dirname, basename, dir_fd, dironly):
97 if basename:
98 if _lexists(_join(dirname, basename), dir_fd):
Johannes Gijsbers836f5432005-01-08 13:13:19 +000099 return [basename]
100 else:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300101 # `os.path.split()` returns an empty basename for paths ending with a
102 # directory separator. 'q*x/' should match only directories.
103 if _isdir(dirname, dir_fd):
Johannes Gijsbers836f5432005-01-08 13:13:19 +0000104 return [basename]
105 return []
106
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300107# Following functions are not public but can be used by third-party code.
108
109def glob0(dirname, pattern):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300110 return _glob0(dirname, pattern, None, False)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300111
112def glob1(dirname, pattern):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300113 return _glob1(dirname, pattern, None, False)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300114
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300115# This helper function recursively yields relative pathnames inside a literal
116# directory.
117
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300118def _glob2(dirname, pattern, dir_fd, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300119 assert _isrecursive(pattern)
Serhiy Storchaka735b7902015-11-09 23:12:07 +0200120 yield pattern[:0]
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300121 yield from _rlistdir(dirname, dir_fd, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300122
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300123# If dironly is false, yields all file names inside a directory.
124# If dironly is true, yields only directory names.
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300125def _iterdir(dirname, dir_fd, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300126 try:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300127 fd = None
128 fsencode = None
129 if dir_fd is not None:
130 if dirname:
131 fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
132 else:
133 arg = dir_fd
134 if isinstance(dirname, bytes):
135 fsencode = os.fsencode
136 elif dirname:
137 arg = dirname
138 elif isinstance(dirname, bytes):
139 arg = bytes(os.curdir, 'ASCII')
140 else:
141 arg = os.curdir
142 try:
143 with os.scandir(arg) as it:
144 for entry in it:
145 try:
146 if not dironly or entry.is_dir():
147 if fsencode is not None:
148 yield fsencode(entry.name)
149 else:
150 yield entry.name
151 except OSError:
152 pass
153 finally:
154 if fd is not None:
155 os.close(fd)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300156 except OSError:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300157 return
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300158
159# Recursively yields relative pathnames inside a literal directory.
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300160def _rlistdir(dirname, dir_fd, dironly):
161 names = list(_iterdir(dirname, dir_fd, dironly))
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300162 for x in names:
163 if not _ishidden(x):
164 yield x
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300165 path = _join(dirname, x) if dirname else x
166 for y in _rlistdir(path, dir_fd, dironly):
167 yield _join(x, y)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300168
Guido van Rossumc2ef5c21992-01-12 23:32:11 +0000169
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300170def _lexists(pathname, dir_fd):
171 # Same as os.path.lexists(), but with dir_fd
172 if dir_fd is None:
173 return os.path.lexists(pathname)
174 try:
175 os.lstat(pathname, dir_fd=dir_fd)
176 except (OSError, ValueError):
177 return False
178 else:
179 return True
180
181def _isdir(pathname, dir_fd):
182 # Same as os.path.isdir(), but with dir_fd
183 if dir_fd is None:
184 return os.path.isdir(pathname)
185 try:
186 st = os.stat(pathname, dir_fd=dir_fd)
187 except (OSError, ValueError):
188 return False
189 else:
190 return stat.S_ISDIR(st.st_mode)
191
192def _join(dirname, basename):
193 # It is common if dirname or basename is empty
194 if not dirname or not basename:
195 return dirname or basename
196 return os.path.join(dirname, basename)
197
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200198magic_check = re.compile('([*?[])')
199magic_check_bytes = re.compile(b'([*?[])')
Tim Golden9b3fb0c2012-11-06 15:33:30 +0000200
Guido van Rossum65a96201991-01-01 18:17:49 +0000201def has_magic(s):
Guido van Rossumf0af3e32008-10-02 18:55:37 +0000202 if isinstance(s, bytes):
203 match = magic_check_bytes.search(s)
204 else:
205 match = magic_check.search(s)
206 return match is not None
Hynek Schlawacke26568f2012-12-27 10:10:11 +0100207
208def _ishidden(path):
209 return path[0] in ('.', b'.'[0])
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200210
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300211def _isrecursive(pattern):
212 if isinstance(pattern, bytes):
213 return pattern == b'**'
214 else:
215 return pattern == '**'
216
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200217def escape(pathname):
218 """Escape all special characters.
219 """
220 # Escaping is done by wrapping any of "*?[" between square brackets.
221 # Metacharacters do not work in the drive part and shouldn't be escaped.
222 drive, pathname = os.path.splitdrive(pathname)
223 if isinstance(pathname, bytes):
224 pathname = magic_check_bytes.sub(br'[\1]', pathname)
225 else:
226 pathname = magic_check.sub(r'[\1]', pathname)
227 return drive + pathname
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300228
229
230_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)