blob: a6cff873508266c61ca6eeed4d70f2c3e56a65aa [file] [log] [blame]
Guido van Rossumab096c91997-04-02 05:47:11 +00001"""Filename globbing utility."""
Guido van Rossum65a96201991-01-01 18:17:49 +00002
Guido van Rossumbba77af1992-01-12 23:26:24 +00003import os
Guido van Rossum9694fca1997-10-22 21:00:49 +00004import re
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import fnmatch
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +03006import itertools
7import stat
Steve Dower60419a72019-06-24 08:42:54 -07008import sys
Guido van Rossum65a96201991-01-01 18:17:49 +00009
Serhiy Storchaka04b57002015-11-09 23:18:19 +020010__all__ = ["glob", "iglob", "escape"]
Guido van Rossumbba77af1992-01-12 23:26:24 +000011
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030012def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
Tim Peters07e99cb2001-01-14 23:47:14 +000013 """Return a list of paths matching a pathname pattern.
Guido van Rossumab096c91997-04-02 05:47:11 +000014
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010015 The pattern may contain simple shell-style wildcards a la
16 fnmatch. However, unlike fnmatch, filenames starting with a
17 dot are special cases that are not matched by '*' and '?'
18 patterns.
Guido van Rossumab096c91997-04-02 05:47:11 +000019
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030020 If recursive is true, the pattern '**' will match any files and
21 zero or more directories and subdirectories.
Tim Peters07e99cb2001-01-14 23:47:14 +000022 """
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030023 return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive))
Johannes Gijsbers836f5432005-01-08 13:13:19 +000024
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030025def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
Benjamin Petersond23f8222009-04-05 19:13:16 +000026 """Return an iterator which yields the paths matching a pathname pattern.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000027
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010028 The pattern may contain simple shell-style wildcards a la
29 fnmatch. However, unlike fnmatch, filenames starting with a
30 dot are special cases that are not matched by '*' and '?'
31 patterns.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000032
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030033 If recursive is true, the pattern '**' will match any files and
34 zero or more directories and subdirectories.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000035 """
Serhiy Storchaka1d346992020-10-20 19:45:38 +030036 sys.audit("glob.glob", pathname, recursive)
Saiyang Goua32f8fe2021-04-21 15:42:55 -070037 sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd)
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030038 if root_dir is not None:
39 root_dir = os.fspath(root_dir)
40 else:
41 root_dir = pathname[:0]
42 it = _iglob(pathname, root_dir, dir_fd, recursive, False)
43 if not pathname or recursive and _isrecursive(pathname[:2]):
44 try:
45 s = next(it) # skip empty string
46 if s:
47 it = itertools.chain((s,), it)
48 except StopIteration:
49 pass
Serhiy Storchaka735b7902015-11-09 23:12:07 +020050 return it
51
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030052def _iglob(pathname, root_dir, dir_fd, recursive, dironly):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000053 dirname, basename = os.path.split(pathname)
Serhiy Storchaka6f201702014-08-12 12:55:12 +030054 if not has_magic(pathname):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030055 assert not dironly
Serhiy Storchaka6f201702014-08-12 12:55:12 +030056 if basename:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030057 if _lexists(_join(root_dir, pathname), dir_fd):
Serhiy Storchaka6f201702014-08-12 12:55:12 +030058 yield pathname
59 else:
60 # Patterns ending with a slash should match only directories
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030061 if _isdir(_join(root_dir, dirname), dir_fd):
Serhiy Storchaka6f201702014-08-12 12:55:12 +030062 yield pathname
63 return
Tim Golden9b3fb0c2012-11-06 15:33:30 +000064 if not dirname:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030065 if recursive and _isrecursive(basename):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030066 yield from _glob2(root_dir, basename, dir_fd, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030067 else:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030068 yield from _glob1(root_dir, basename, dir_fd, dironly)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000069 return
Antoine Pitrou3d068b22012-12-16 13:49:37 +010070 # `os.path.split()` returns the argument itself as a dirname if it is a
71 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
72 # contains magic characters (i.e. r'\\?\C:').
73 if dirname != pathname and has_magic(dirname):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030074 dirs = _iglob(dirname, root_dir, dir_fd, recursive, True)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000075 else:
76 dirs = [dirname]
77 if has_magic(basename):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030078 if recursive and _isrecursive(basename):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030079 glob_in_dir = _glob2
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030080 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030081 glob_in_dir = _glob1
Tim Golden9b3fb0c2012-11-06 15:33:30 +000082 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030083 glob_in_dir = _glob0
Tim Golden9b3fb0c2012-11-06 15:33:30 +000084 for dirname in dirs:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030085 for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000086 yield os.path.join(dirname, name)
Johannes Gijsbers836f5432005-01-08 13:13:19 +000087
88# These 2 helper functions non-recursively glob inside a literal directory.
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030089# They return a list of basenames. _glob1 accepts a pattern while _glob0
Johannes Gijsbers836f5432005-01-08 13:13:19 +000090# takes a literal basename (so it only has to check for its existence).
Guido van Rossum65a96201991-01-01 18:17:49 +000091
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030092def _glob1(dirname, pattern, dir_fd, dironly):
93 names = list(_iterdir(dirname, dir_fd, dironly))
Hynek Schlawacke26568f2012-12-27 10:10:11 +010094 if not _ishidden(pattern):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030095 names = (x for x in names if not _ishidden(x))
Guido van Rossumd8faa362007-04-27 19:54:29 +000096 return fnmatch.filter(names, pattern)
Guido van Rossum65a96201991-01-01 18:17:49 +000097
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +030098def _glob0(dirname, basename, dir_fd, dironly):
99 if basename:
100 if _lexists(_join(dirname, basename), dir_fd):
Johannes Gijsbers836f5432005-01-08 13:13:19 +0000101 return [basename]
102 else:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300103 # `os.path.split()` returns an empty basename for paths ending with a
104 # directory separator. 'q*x/' should match only directories.
105 if _isdir(dirname, dir_fd):
Johannes Gijsbers836f5432005-01-08 13:13:19 +0000106 return [basename]
107 return []
108
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300109# Following functions are not public but can be used by third-party code.
110
111def glob0(dirname, pattern):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300112 return _glob0(dirname, pattern, None, False)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300113
114def glob1(dirname, pattern):
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300115 return _glob1(dirname, pattern, None, False)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300116
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300117# This helper function recursively yields relative pathnames inside a literal
118# directory.
119
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300120def _glob2(dirname, pattern, dir_fd, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300121 assert _isrecursive(pattern)
Serhiy Storchaka735b7902015-11-09 23:12:07 +0200122 yield pattern[:0]
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300123 yield from _rlistdir(dirname, dir_fd, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300124
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300125# If dironly is false, yields all file names inside a directory.
126# If dironly is true, yields only directory names.
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300127def _iterdir(dirname, dir_fd, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300128 try:
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300129 fd = None
130 fsencode = None
131 if dir_fd is not None:
132 if dirname:
133 fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd)
134 else:
135 arg = dir_fd
136 if isinstance(dirname, bytes):
137 fsencode = os.fsencode
138 elif dirname:
139 arg = dirname
140 elif isinstance(dirname, bytes):
141 arg = bytes(os.curdir, 'ASCII')
142 else:
143 arg = os.curdir
144 try:
145 with os.scandir(arg) as it:
146 for entry in it:
147 try:
148 if not dironly or entry.is_dir():
149 if fsencode is not None:
150 yield fsencode(entry.name)
151 else:
152 yield entry.name
153 except OSError:
154 pass
155 finally:
156 if fd is not None:
157 os.close(fd)
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300158 except OSError:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300159 return
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300160
161# Recursively yields relative pathnames inside a literal directory.
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300162def _rlistdir(dirname, dir_fd, dironly):
163 names = list(_iterdir(dirname, dir_fd, dironly))
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300164 for x in names:
165 if not _ishidden(x):
166 yield x
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300167 path = _join(dirname, x) if dirname else x
168 for y in _rlistdir(path, dir_fd, dironly):
169 yield _join(x, y)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300170
Guido van Rossumc2ef5c21992-01-12 23:32:11 +0000171
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300172def _lexists(pathname, dir_fd):
173 # Same as os.path.lexists(), but with dir_fd
174 if dir_fd is None:
175 return os.path.lexists(pathname)
176 try:
177 os.lstat(pathname, dir_fd=dir_fd)
178 except (OSError, ValueError):
179 return False
180 else:
181 return True
182
183def _isdir(pathname, dir_fd):
184 # Same as os.path.isdir(), but with dir_fd
185 if dir_fd is None:
186 return os.path.isdir(pathname)
187 try:
188 st = os.stat(pathname, dir_fd=dir_fd)
189 except (OSError, ValueError):
190 return False
191 else:
192 return stat.S_ISDIR(st.st_mode)
193
194def _join(dirname, basename):
195 # It is common if dirname or basename is empty
196 if not dirname or not basename:
197 return dirname or basename
198 return os.path.join(dirname, basename)
199
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200200magic_check = re.compile('([*?[])')
201magic_check_bytes = re.compile(b'([*?[])')
Tim Golden9b3fb0c2012-11-06 15:33:30 +0000202
Guido van Rossum65a96201991-01-01 18:17:49 +0000203def has_magic(s):
Guido van Rossumf0af3e32008-10-02 18:55:37 +0000204 if isinstance(s, bytes):
205 match = magic_check_bytes.search(s)
206 else:
207 match = magic_check.search(s)
208 return match is not None
Hynek Schlawacke26568f2012-12-27 10:10:11 +0100209
210def _ishidden(path):
211 return path[0] in ('.', b'.'[0])
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200212
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300213def _isrecursive(pattern):
214 if isinstance(pattern, bytes):
215 return pattern == b'**'
216 else:
217 return pattern == '**'
218
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200219def escape(pathname):
220 """Escape all special characters.
221 """
222 # Escaping is done by wrapping any of "*?[" between square brackets.
223 # Metacharacters do not work in the drive part and shouldn't be escaped.
224 drive, pathname = os.path.splitdrive(pathname)
225 if isinstance(pathname, bytes):
226 pathname = magic_check_bytes.sub(br'[\1]', pathname)
227 else:
228 pathname = magic_check.sub(r'[\1]', pathname)
229 return drive + pathname
Serhiy Storchaka8a64cea2020-06-18 22:08:27 +0300230
231
232_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)