blob: 16330d816a4e8fe1229f5f6677f848bcbea359cf [file] [log] [blame]
Guido van Rossumab096c91997-04-02 05:47:11 +00001"""Filename globbing utility."""
Guido van Rossum65a96201991-01-01 18:17:49 +00002
Guido van Rossumbba77af1992-01-12 23:26:24 +00003import os
Guido van Rossum9694fca1997-10-22 21:00:49 +00004import re
Guido van Rossumd8faa362007-04-27 19:54:29 +00005import fnmatch
Guido van Rossum65a96201991-01-01 18:17:49 +00006
Serhiy Storchaka04b57002015-11-09 23:18:19 +02007__all__ = ["glob", "iglob", "escape"]
Guido van Rossumbba77af1992-01-12 23:26:24 +00008
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +03009def glob(pathname, *, recursive=False):
Tim Peters07e99cb2001-01-14 23:47:14 +000010 """Return a list of paths matching a pathname pattern.
Guido van Rossumab096c91997-04-02 05:47:11 +000011
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010012 The pattern may contain simple shell-style wildcards a la
13 fnmatch. However, unlike fnmatch, filenames starting with a
14 dot are special cases that are not matched by '*' and '?'
15 patterns.
Guido van Rossumab096c91997-04-02 05:47:11 +000016
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030017 If recursive is true, the pattern '**' will match any files and
18 zero or more directories and subdirectories.
Tim Peters07e99cb2001-01-14 23:47:14 +000019 """
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030020 return list(iglob(pathname, recursive=recursive))
Johannes Gijsbers836f5432005-01-08 13:13:19 +000021
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030022def iglob(pathname, *, recursive=False):
Benjamin Petersond23f8222009-04-05 19:13:16 +000023 """Return an iterator which yields the paths matching a pathname pattern.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000024
Petri Lehtinenee4a20b2013-02-23 19:53:03 +010025 The pattern may contain simple shell-style wildcards a la
26 fnmatch. However, unlike fnmatch, filenames starting with a
27 dot are special cases that are not matched by '*' and '?'
28 patterns.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000029
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030030 If recursive is true, the pattern '**' will match any files and
31 zero or more directories and subdirectories.
Johannes Gijsbers836f5432005-01-08 13:13:19 +000032 """
Serhiy Storchaka735b7902015-11-09 23:12:07 +020033 it = _iglob(pathname, recursive)
34 if recursive and _isrecursive(pathname):
35 s = next(it) # skip empty string
36 assert not s
37 return it
38
39def _iglob(pathname, recursive):
Tim Golden9b3fb0c2012-11-06 15:33:30 +000040 dirname, basename = os.path.split(pathname)
Serhiy Storchaka6f201702014-08-12 12:55:12 +030041 if not has_magic(pathname):
42 if basename:
43 if os.path.lexists(pathname):
44 yield pathname
45 else:
46 # Patterns ending with a slash should match only directories
47 if os.path.isdir(dirname):
48 yield pathname
49 return
Tim Golden9b3fb0c2012-11-06 15:33:30 +000050 if not dirname:
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030051 if recursive and _isrecursive(basename):
52 yield from glob2(dirname, basename)
53 else:
54 yield from glob1(dirname, basename)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000055 return
Antoine Pitrou3d068b22012-12-16 13:49:37 +010056 # `os.path.split()` returns the argument itself as a dirname if it is a
57 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
58 # contains magic characters (i.e. r'\\?\C:').
59 if dirname != pathname and has_magic(dirname):
Serhiy Storchaka735b7902015-11-09 23:12:07 +020060 dirs = _iglob(dirname, recursive)
Tim Golden9b3fb0c2012-11-06 15:33:30 +000061 else:
62 dirs = [dirname]
63 if has_magic(basename):
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +030064 if recursive and _isrecursive(basename):
65 glob_in_dir = glob2
66 else:
67 glob_in_dir = glob1
Tim Golden9b3fb0c2012-11-06 15:33:30 +000068 else:
69 glob_in_dir = glob0
70 for dirname in dirs:
71 for name in glob_in_dir(dirname, basename):
72 yield os.path.join(dirname, name)
Johannes Gijsbers836f5432005-01-08 13:13:19 +000073
74# These 2 helper functions non-recursively glob inside a literal directory.
75# They return a list of basenames. `glob1` accepts a pattern while `glob0`
76# takes a literal basename (so it only has to check for its existence).
Guido van Rossum65a96201991-01-01 18:17:49 +000077
78def glob1(dirname, pattern):
Johannes Gijsbers836f5432005-01-08 13:13:19 +000079 if not dirname:
Guido van Rossumf0af3e32008-10-02 18:55:37 +000080 if isinstance(pattern, bytes):
81 dirname = bytes(os.curdir, 'ASCII')
82 else:
83 dirname = os.curdir
Tim Peters07e99cb2001-01-14 23:47:14 +000084 try:
85 names = os.listdir(dirname)
Andrew Svetlovad28c7f2012-12-18 22:02:39 +020086 except OSError:
Tim Peters07e99cb2001-01-14 23:47:14 +000087 return []
Hynek Schlawacke26568f2012-12-27 10:10:11 +010088 if not _ishidden(pattern):
89 names = [x for x in names if not _ishidden(x)]
Guido van Rossumd8faa362007-04-27 19:54:29 +000090 return fnmatch.filter(names, pattern)
Guido van Rossum65a96201991-01-01 18:17:49 +000091
Johannes Gijsbers836f5432005-01-08 13:13:19 +000092def glob0(dirname, basename):
Antoine Pitrou54615582012-12-16 16:03:01 +010093 if not basename:
Johannes Gijsbers836f5432005-01-08 13:13:19 +000094 # `os.path.split()` returns an empty basename for paths ending with a
95 # directory separator. 'q*x/' should match only directories.
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000096 if os.path.isdir(dirname):
Johannes Gijsbers836f5432005-01-08 13:13:19 +000097 return [basename]
98 else:
99 if os.path.lexists(os.path.join(dirname, basename)):
100 return [basename]
101 return []
102
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300103# This helper function recursively yields relative pathnames inside a literal
104# directory.
105
106def glob2(dirname, pattern):
107 assert _isrecursive(pattern)
Serhiy Storchaka735b7902015-11-09 23:12:07 +0200108 yield pattern[:0]
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300109 yield from _rlistdir(dirname)
110
111# Recursively yields relative pathnames inside a literal directory.
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300112def _rlistdir(dirname):
113 if not dirname:
114 if isinstance(dirname, bytes):
115 dirname = bytes(os.curdir, 'ASCII')
116 else:
117 dirname = os.curdir
118 try:
119 names = os.listdir(dirname)
120 except os.error:
121 return
122 for x in names:
123 if not _ishidden(x):
124 yield x
125 path = os.path.join(dirname, x) if dirname else x
126 for y in _rlistdir(path):
127 yield os.path.join(x, y)
128
Guido van Rossumc2ef5c21992-01-12 23:32:11 +0000129
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200130magic_check = re.compile('([*?[])')
131magic_check_bytes = re.compile(b'([*?[])')
Tim Golden9b3fb0c2012-11-06 15:33:30 +0000132
Guido van Rossum65a96201991-01-01 18:17:49 +0000133def has_magic(s):
Guido van Rossumf0af3e32008-10-02 18:55:37 +0000134 if isinstance(s, bytes):
135 match = magic_check_bytes.search(s)
136 else:
137 match = magic_check.search(s)
138 return match is not None
Hynek Schlawacke26568f2012-12-27 10:10:11 +0100139
140def _ishidden(path):
141 return path[0] in ('.', b'.'[0])
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200142
Serhiy Storchakac2edcdd2014-09-11 12:17:37 +0300143def _isrecursive(pattern):
144 if isinstance(pattern, bytes):
145 return pattern == b'**'
146 else:
147 return pattern == '**'
148
Serhiy Storchakafd32fff2013-11-18 13:06:43 +0200149def escape(pathname):
150 """Escape all special characters.
151 """
152 # Escaping is done by wrapping any of "*?[" between square brackets.
153 # Metacharacters do not work in the drive part and shouldn't be escaped.
154 drive, pathname = os.path.splitdrive(pathname)
155 if isinstance(pathname, bytes):
156 pathname = magic_check_bytes.sub(br'[\1]', pathname)
157 else:
158 pathname = magic_check.sub(r'[\1]', pathname)
159 return drive + pathname