blob: 9fc08f45df115a0196636e6b3a10d316607b56eb [file] [log] [blame]
Guido van Rossumab096c91997-04-02 05:47:11 +00001"""Filename globbing utility."""
Guido van Rossum65a96201991-01-01 18:17:49 +00002
Miss Islington (bot)38e021a2021-06-23 03:28:08 -07003import contextlib
Guido van Rossumbba77af1992-01-12 23:26:24 +00004import os
Guido van Rossum9694fca1997-10-22 21:00:49 +00005import re
Guido van Rossumd8faa362007-04-27 19:54:29 +00006import fnmatch
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +03007import itertools
8import stat
Steve Dower60419a72019-06-24 08:42:54 -07009import sys
Guido van Rossum65a96201991-01-01 18:17:49 +000010
Serhiy Storchaka04b57002015-11-09 23:18:19 +020011__all__ = ["glob", "iglob", "escape"]
Guido van Rossumbba77af1992-01-12 23:26:24 +000012
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030013def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
Tim Peters07e99cb2001-01-14 23:47:14 +000014 """Return a list of paths matching a pathname pattern.
Guido van Rossumab096c91997-04-02 05:47:11 +000015
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010016 The pattern may contain simple shell-style wildcards a la
17 fnmatch. However, unlike fnmatch, filenames starting with a
18 dot are special cases that are not matched by '*' and '?'
19 patterns.
Guido van Rossumab096c91997-04-02 05:47:11 +000020
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030021 If recursive is true, the pattern '**' will match any files and
22 zero or more directories and subdirectories.
Tim Peters07e99cb2001-01-14 23:47:14 +000023 """
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030024 return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive))
Johannes Gijsbers836f5432005-01-08 13:13:19 +000025
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030026def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
Benjamin Petersond23f8222009-04-05 19:13:16 +000027 """Return an iterator which yields the paths matching a pathname pattern.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000028
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010029 The pattern may contain simple shell-style wildcards a la
30 fnmatch. However, unlike fnmatch, filenames starting with a
31 dot are special cases that are not matched by '*' and '?'
32 patterns.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000033
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030034 If recursive is true, the pattern '**' will match any files and
35 zero or more directories and subdirectories.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000036 """
Serhiy Storchaka1d346992020-10-20 19:45:38 +030037 sys.audit("glob.glob", pathname, recursive)
Saiyang Goua32f8fe2021-04-21 15:42:55 -070038 sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd)
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030039 if root_dir is not None:
40 root_dir = os.fspath(root_dir)
41 else:
42 root_dir = pathname[:0]
43 it = _iglob(pathname, root_dir, dir_fd, recursive, False)
44 if not pathname or recursive and _isrecursive(pathname[:2]):
45 try:
46 s = next(it) # skip empty string
47 if s:
48 it = itertools.chain((s,), it)
49 except StopIteration:
50 pass
Serhiy Storchaka735b7902015-11-09 23:12:07 +020051 return it
52
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030053def _iglob(pathname, root_dir, dir_fd, recursive, dironly):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000054 dirname, basename = os.path.split(pathname)
Serhiy Storchaka6f201702014-08-12 12:55:12 +030055 if not has_magic(pathname):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030056 assert not dironly
Serhiy Storchaka6f201702014-08-12 12:55:12 +030057 if basename:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030058 if _lexists(_join(root_dir, pathname), dir_fd):
Serhiy Storchaka6f201702014-08-12 12:55:12 +030059 yield pathname
60 else:
61 # Patterns ending with a slash should match only directories
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030062 if _isdir(_join(root_dir, dirname), dir_fd):
Serhiy Storchaka6f201702014-08-12 12:55:12 +030063 yield pathname
64 return
Tim Golden9b3fb0c2012-11-06 15:33:30 +000065 if not dirname:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030066 if recursive and _isrecursive(basename):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030067 yield from _glob2(root_dir, basename, dir_fd, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030068 else:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030069 yield from _glob1(root_dir, basename, dir_fd, dironly)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000070 return
Antoine Pitrou3d068b22012-12-16 13:49:37 +010071 # `os.path.split()` returns the argument itself as a dirname if it is a
72 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
73 # contains magic characters (i.e. r'\\?\C:').
74 if dirname != pathname and has_magic(dirname):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030075 dirs = _iglob(dirname, root_dir, dir_fd, recursive, True)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000076 else:
77 dirs = [dirname]
78 if has_magic(basename):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030079 if recursive and _isrecursive(basename):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030080 glob_in_dir = _glob2
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030081 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030082 glob_in_dir = _glob1
Tim Golden9b3fb0c2012-11-06 15:33:30 +000083 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030084 glob_in_dir = _glob0
Tim Golden9b3fb0c2012-11-06 15:33:30 +000085 for dirname in dirs:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030086 for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000087 yield os.path.join(dirname, name)
Johannes Gijsbers836f5432005-01-08 13:13:19 +000088
89# These 2 helper functions non-recursively glob inside a literal directory.
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030090# They return a list of basenames. _glob1 accepts a pattern while _glob0
Johannes Gijsbers836f5432005-01-08 13:13:19 +000091# takes a literal basename (so it only has to check for its existence).
Guido van Rossum65a96201991-01-01 18:17:49 +000092
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030093def _glob1(dirname, pattern, dir_fd, dironly):
Miss Islington (bot)38e021a2021-06-23 03:28:08 -070094 names = _listdir(dirname, dir_fd, dironly)
Hynek Schlawacke26568f2012-12-27 10:10:11 +010095 if not _ishidden(pattern):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030096 names = (x for x in names if not _ishidden(x))
Guido van Rossumd8faa362007-04-27 19:54:29 +000097 return fnmatch.filter(names, pattern)
Guido van Rossum65a96201991-01-01 18:17:49 +000098
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030099def _glob0(dirname, basename, dir_fd, dironly):
100 if basename:
101 if _lexists(_join(dirname, basename), dir_fd):
Johannes Gijsbers836f5432005-01-08 13:13:19 +0000102 return [basename]
103 else:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300104 # `os.path.split()` returns an empty basename for paths ending with a
105 # directory separator. 'q*x/' should match only directories.
106 if _isdir(dirname, dir_fd):
Johannes Gijsbers836f5432005-01-08 13:13:19 +0000107 return [basename]
108 return []
109
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300110# Following functions are not public but can be used by third-party code.
111
112def glob0(dirname, pattern):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300113 return _glob0(dirname, pattern, None, False)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300114
115def glob1(dirname, pattern):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300116 return _glob1(dirname, pattern, None, False)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300117
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300118# This helper function recursively yields relative pathnames inside a literal
119# directory.
120
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300121def _glob2(dirname, pattern, dir_fd, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300122 assert _isrecursive(pattern)
Serhiy Storchaka735b7902015-11-09 23:12:07 +0200123 yield pattern[:0]
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300124 yield from _rlistdir(dirname, dir_fd, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300125
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300126# If dironly is false, yields all file names inside a directory.
127# If dironly is true, yields only directory names.
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300128def _iterdir(dirname, dir_fd, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300129 try:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300130 fd = None
131 fsencode = None
132 if dir_fd is not None:
133 if dirname:
134 fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
135 else:
136 arg = dir_fd
137 if isinstance(dirname, bytes):
138 fsencode = os.fsencode
139 elif dirname:
140 arg = dirname
141 elif isinstance(dirname, bytes):
142 arg = bytes(os.curdir, 'ASCII')
143 else:
144 arg = os.curdir
145 try:
146 with os.scandir(arg) as it:
147 for entry in it:
148 try:
149 if not dironly or entry.is_dir():
150 if fsencode is not None:
151 yield fsencode(entry.name)
152 else:
153 yield entry.name
154 except OSError:
155 pass
156 finally:
157 if fd is not None:
158 os.close(fd)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300159 except OSError:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300160 return
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300161
Miss Islington (bot)38e021a2021-06-23 03:28:08 -0700162def _listdir(dirname, dir_fd, dironly):
163 with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it:
164 return list(it)
165
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300166# Recursively yields relative pathnames inside a literal directory.
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300167def _rlistdir(dirname, dir_fd, dironly):
Miss Islington (bot)38e021a2021-06-23 03:28:08 -0700168 names = _listdir(dirname, dir_fd, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300169 for x in names:
170 if not _ishidden(x):
171 yield x
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300172 path = _join(dirname, x) if dirname else x
173 for y in _rlistdir(path, dir_fd, dironly):
174 yield _join(x, y)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300175
Guido van Rossumc2ef5c21992-01-12 23:32:11 +0000176
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300177def _lexists(pathname, dir_fd):
178 # Same as os.path.lexists(), but with dir_fd
179 if dir_fd is None:
180 return os.path.lexists(pathname)
181 try:
182 os.lstat(pathname, dir_fd=dir_fd)
183 except (OSError, ValueError):
184 return False
185 else:
186 return True
187
188def _isdir(pathname, dir_fd):
189 # Same as os.path.isdir(), but with dir_fd
190 if dir_fd is None:
191 return os.path.isdir(pathname)
192 try:
193 st = os.stat(pathname, dir_fd=dir_fd)
194 except (OSError, ValueError):
195 return False
196 else:
197 return stat.S_ISDIR(st.st_mode)
198
199def _join(dirname, basename):
200 # It is common if dirname or basename is empty
201 if not dirname or not basename:
202 return dirname or basename
203 return os.path.join(dirname, basename)
204
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200205magic_check = re.compile('([*?[])')
206magic_check_bytes = re.compile(b'([*?[])')
Tim Golden9b3fb0c2012-11-06 15:33:30 +0000207
Guido van Rossum65a96201991-01-01 18:17:49 +0000208def has_magic(s):
Guido van Rossumf0af3e32008-10-02 18:55:37 +0000209 if isinstance(s, bytes):
210 match = magic_check_bytes.search(s)
211 else:
212 match = magic_check.search(s)
213 return match is not None
Hynek Schlawacke26568f2012-12-27 10:10:11 +0100214
215def _ishidden(path):
216 return path[0] in ('.', b'.'[0])
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200217
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300218def _isrecursive(pattern):
219 if isinstance(pattern, bytes):
220 return pattern == b'**'
221 else:
222 return pattern == '**'
223
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200224def escape(pathname):
225 """Escape all special characters.
226 """
227 # Escaping is done by wrapping any of "*?[" between square brackets.
228 # Metacharacters do not work in the drive part and shouldn't be escaped.
229 drive, pathname = os.path.splitdrive(pathname)
230 if isinstance(pathname, bytes):
231 pathname = magic_check_bytes.sub(br'[\1]', pathname)
232 else:
233 pathname = magic_check.sub(r'[\1]', pathname)
234 return drive + pathname
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300235
236
237_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)