blob: 0b3fcc6bbb9af3b63f8c2a5577f6301ea9c65b60 [file] [log] [blame]
Guido van Rossumab096c91997-04-02 05:47:11 +00001"""Filename globbing utility."""
Guido van Rossum65a96201991-01-01 18:17:49 +00002
Guido van Rossumbba77af1992-01-12 23:26:24 +00003import os
Guido van Rossum9694fca1997-10-22 21:00:49 +00004import re
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import fnmatch
Steve Dower60419a72019-06-24 08:42:54 -07006import sys
Guido van Rossum65a96201991-01-01 18:17:49 +00007
Serhiy Storchaka04b57002015-11-09 23:18:19 +02008__all__ = ["glob", "iglob", "escape"]
Guido van Rossumbba77af1992-01-12 23:26:24 +00009
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030010def glob(pathname, *, recursive=False):
Tim Peters07e99cb2001-01-14 23:47:14 +000011 """Return a list of paths matching a pathname pattern.
Guido van Rossumab096c91997-04-02 05:47:11 +000012
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010013 The pattern may contain simple shell-style wildcards a la
14 fnmatch. However, unlike fnmatch, filenames starting with a
15 dot are special cases that are not matched by '*' and '?'
16 patterns.
Guido van Rossumab096c91997-04-02 05:47:11 +000017
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030018 If recursive is true, the pattern '**' will match any files and
19 zero or more directories and subdirectories.
Tim Peters07e99cb2001-01-14 23:47:14 +000020 """
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030021 return list(iglob(pathname, recursive=recursive))
Johannes Gijsbers836f5432005-01-08 13:13:19 +000022
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030023def iglob(pathname, *, recursive=False):
Benjamin Petersond23f8222009-04-05 19:13:16 +000024 """Return an iterator which yields the paths matching a pathname pattern.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000025
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010026 The pattern may contain simple shell-style wildcards a la
27 fnmatch. However, unlike fnmatch, filenames starting with a
28 dot are special cases that are not matched by '*' and '?'
29 patterns.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000030
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030031 If recursive is true, the pattern '**' will match any files and
32 zero or more directories and subdirectories.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000033 """
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030034 it = _iglob(pathname, recursive, False)
Serhiy Storchaka735b7902015-11-09 23:12:07 +020035 if recursive and _isrecursive(pathname):
36 s = next(it) # skip empty string
37 assert not s
38 return it
39
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030040def _iglob(pathname, recursive, dironly):
Steve Dower60419a72019-06-24 08:42:54 -070041 sys.audit("glob.glob", pathname, recursive)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000042 dirname, basename = os.path.split(pathname)
Serhiy Storchaka6f201702014-08-12 12:55:12 +030043 if not has_magic(pathname):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030044 assert not dironly
Serhiy Storchaka6f201702014-08-12 12:55:12 +030045 if basename:
46 if os.path.lexists(pathname):
47 yield pathname
48 else:
49 # Patterns ending with a slash should match only directories
50 if os.path.isdir(dirname):
51 yield pathname
52 return
Tim Golden9b3fb0c2012-11-06 15:33:30 +000053 if not dirname:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030054 if recursive and _isrecursive(basename):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030055 yield from _glob2(dirname, basename, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030056 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030057 yield from _glob1(dirname, basename, dironly)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000058 return
Antoine Pitrou3d068b22012-12-16 13:49:37 +010059 # `os.path.split()` returns the argument itself as a dirname if it is a
60 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
61 # contains magic characters (i.e. r'\\?\C:').
62 if dirname != pathname and has_magic(dirname):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030063 dirs = _iglob(dirname, recursive, True)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000064 else:
65 dirs = [dirname]
66 if has_magic(basename):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030067 if recursive and _isrecursive(basename):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030068 glob_in_dir = _glob2
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030069 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030070 glob_in_dir = _glob1
Tim Golden9b3fb0c2012-11-06 15:33:30 +000071 else:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030072 glob_in_dir = _glob0
Tim Golden9b3fb0c2012-11-06 15:33:30 +000073 for dirname in dirs:
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030074 for name in glob_in_dir(dirname, basename, dironly):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000075 yield os.path.join(dirname, name)
Johannes Gijsbers836f5432005-01-08 13:13:19 +000076
77# These 2 helper functions non-recursively glob inside a literal directory.
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030078# They return a list of basenames. _glob1 accepts a pattern while _glob0
Johannes Gijsbers836f5432005-01-08 13:13:19 +000079# takes a literal basename (so it only has to check for its existence).
Guido van Rossum65a96201991-01-01 18:17:49 +000080
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030081def _glob1(dirname, pattern, dironly):
82 names = list(_iterdir(dirname, dironly))
Hynek Schlawacke26568f2012-12-27 10:10:11 +010083 if not _ishidden(pattern):
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030084 names = (x for x in names if not _ishidden(x))
Guido van Rossumd8faa362007-04-27 19:54:29 +000085 return fnmatch.filter(names, pattern)
Guido van Rossum65a96201991-01-01 18:17:49 +000086
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030087def _glob0(dirname, basename, dironly):
Antoine Pitrou54615582012-12-16 16:03:01 +010088 if not basename:
Johannes Gijsbers836f5432005-01-08 13:13:19 +000089 # `os.path.split()` returns an empty basename for paths ending with a
90 # directory separator. 'q*x/' should match only directories.
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000091 if os.path.isdir(dirname):
Johannes Gijsbers836f5432005-01-08 13:13:19 +000092 return [basename]
93 else:
94 if os.path.lexists(os.path.join(dirname, basename)):
95 return [basename]
96 return []
97
Serhiy Storchaka28ab6342016-09-06 22:33:41 +030098# Following functions are not public but can be used by third-party code.
99
100def glob0(dirname, pattern):
101 return _glob0(dirname, pattern, False)
102
103def glob1(dirname, pattern):
104 return _glob1(dirname, pattern, False)
105
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300106# This helper function recursively yields relative pathnames inside a literal
107# directory.
108
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300109def _glob2(dirname, pattern, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300110 assert _isrecursive(pattern)
Serhiy Storchaka735b7902015-11-09 23:12:07 +0200111 yield pattern[:0]
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300112 yield from _rlistdir(dirname, dironly)
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300113
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300114# If dironly is false, yields all file names inside a directory.
115# If dironly is true, yields only directory names.
116def _iterdir(dirname, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300117 if not dirname:
118 if isinstance(dirname, bytes):
119 dirname = bytes(os.curdir, 'ASCII')
120 else:
121 dirname = os.curdir
122 try:
Serhiy Storchaka3ae41552016-10-05 23:17:10 +0300123 with os.scandir(dirname) as it:
124 for entry in it:
125 try:
126 if not dironly or entry.is_dir():
127 yield entry.name
128 except OSError:
129 pass
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300130 except OSError:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300131 return
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300132
133# Recursively yields relative pathnames inside a literal directory.
134def _rlistdir(dirname, dironly):
135 names = list(_iterdir(dirname, dironly))
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300136 for x in names:
137 if not _ishidden(x):
138 yield x
139 path = os.path.join(dirname, x) if dirname else x
Serhiy Storchaka28ab6342016-09-06 22:33:41 +0300140 for y in _rlistdir(path, dironly):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300141 yield os.path.join(x, y)
142
Guido van Rossumc2ef5c21992-01-12 23:32:11 +0000143
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200144magic_check = re.compile('([*?[])')
145magic_check_bytes = re.compile(b'([*?[])')
Tim Golden9b3fb0c2012-11-06 15:33:30 +0000146
Guido van Rossum65a96201991-01-01 18:17:49 +0000147def has_magic(s):
Guido van Rossumf0af3e32008-10-02 18:55:37 +0000148 if isinstance(s, bytes):
149 match = magic_check_bytes.search(s)
150 else:
151 match = magic_check.search(s)
152 return match is not None
Hynek Schlawacke26568f2012-12-27 10:10:11 +0100153
154def _ishidden(path):
155 return path[0] in ('.', b'.'[0])
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200156
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300157def _isrecursive(pattern):
158 if isinstance(pattern, bytes):
159 return pattern == b'**'
160 else:
161 return pattern == '**'
162
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200163def escape(pathname):
164 """Escape all special characters.
165 """
166 # Escaping is done by wrapping any of "*?[" between square brackets.
167 # Metacharacters do not work in the drive part and shouldn't be escaped.
168 drive, pathname = os.path.splitdrive(pathname)
169 if isinstance(pathname, bytes):
170 pathname = magic_check_bytes.sub(br'[\1]', pathname)
171 else:
172 pathname = magic_check.sub(r'[\1]', pathname)
173 return drive + pathname