Guido van Rossum | ab096c9 | 1997-04-02 05:47:11 +0000 | [diff] [blame] | 1 | """Filename globbing utility.""" |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 2 | |
Guido van Rossum | bba77af | 1992-01-12 23:26:24 +0000 | [diff] [blame] | 3 | import os |
Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame] | 4 | import re |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 5 | import fnmatch |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 6 | |
Serhiy Storchaka | 04b5700 | 2015-11-09 23:18:19 +0200 | [diff] [blame] | 7 | __all__ = ["glob", "iglob", "escape"] |
Guido van Rossum | bba77af | 1992-01-12 23:26:24 +0000 | [diff] [blame] | 8 | |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 9 | def glob(pathname, *, recursive=False): |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 10 | """Return a list of paths matching a pathname pattern. |
Guido van Rossum | ab096c9 | 1997-04-02 05:47:11 +0000 | [diff] [blame] | 11 | |
Petri Lehtinen | ee4a20b | 2013-02-23 19:53:03 +0100 | [diff] [blame] | 12 | The pattern may contain simple shell-style wildcards a la |
| 13 | fnmatch. However, unlike fnmatch, filenames starting with a |
| 14 | dot are special cases that are not matched by '*' and '?' |
| 15 | patterns. |
Guido van Rossum | ab096c9 | 1997-04-02 05:47:11 +0000 | [diff] [blame] | 16 | |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 17 | If recursive is true, the pattern '**' will match any files and |
| 18 | zero or more directories and subdirectories. |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 19 | """ |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 20 | return list(iglob(pathname, recursive=recursive)) |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 21 | |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 22 | def iglob(pathname, *, recursive=False): |
Benjamin Peterson | d23f822 | 2009-04-05 19:13:16 +0000 | [diff] [blame] | 23 | """Return an iterator which yields the paths matching a pathname pattern. |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 24 | |
Petri Lehtinen | ee4a20b | 2013-02-23 19:53:03 +0100 | [diff] [blame] | 25 | The pattern may contain simple shell-style wildcards a la |
| 26 | fnmatch. However, unlike fnmatch, filenames starting with a |
| 27 | dot are special cases that are not matched by '*' and '?' |
| 28 | patterns. |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 29 | |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 30 | If recursive is true, the pattern '**' will match any files and |
| 31 | zero or more directories and subdirectories. |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 32 | """ |
Serhiy Storchaka | 735b790 | 2015-11-09 23:12:07 +0200 | [diff] [blame] | 33 | it = _iglob(pathname, recursive) |
| 34 | if recursive and _isrecursive(pathname): |
| 35 | s = next(it) # skip empty string |
| 36 | assert not s |
| 37 | return it |
| 38 | |
| 39 | def _iglob(pathname, recursive): |
Tim Golden | 9b3fb0c | 2012-11-06 15:33:30 +0000 | [diff] [blame] | 40 | dirname, basename = os.path.split(pathname) |
Serhiy Storchaka | 6f20170 | 2014-08-12 12:55:12 +0300 | [diff] [blame] | 41 | if not has_magic(pathname): |
| 42 | if basename: |
| 43 | if os.path.lexists(pathname): |
| 44 | yield pathname |
| 45 | else: |
| 46 | # Patterns ending with a slash should match only directories |
| 47 | if os.path.isdir(dirname): |
| 48 | yield pathname |
| 49 | return |
Tim Golden | 9b3fb0c | 2012-11-06 15:33:30 +0000 | [diff] [blame] | 50 | if not dirname: |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 51 | if recursive and _isrecursive(basename): |
| 52 | yield from glob2(dirname, basename) |
| 53 | else: |
| 54 | yield from glob1(dirname, basename) |
Tim Golden | 9b3fb0c | 2012-11-06 15:33:30 +0000 | [diff] [blame] | 55 | return |
Antoine Pitrou | 3d068b2 | 2012-12-16 13:49:37 +0100 | [diff] [blame] | 56 | # `os.path.split()` returns the argument itself as a dirname if it is a |
| 57 | # drive or UNC path. Prevent an infinite recursion if a drive or UNC path |
| 58 | # contains magic characters (i.e. r'\\?\C:'). |
| 59 | if dirname != pathname and has_magic(dirname): |
Serhiy Storchaka | 735b790 | 2015-11-09 23:12:07 +0200 | [diff] [blame] | 60 | dirs = _iglob(dirname, recursive) |
Tim Golden | 9b3fb0c | 2012-11-06 15:33:30 +0000 | [diff] [blame] | 61 | else: |
| 62 | dirs = [dirname] |
| 63 | if has_magic(basename): |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 64 | if recursive and _isrecursive(basename): |
| 65 | glob_in_dir = glob2 |
| 66 | else: |
| 67 | glob_in_dir = glob1 |
Tim Golden | 9b3fb0c | 2012-11-06 15:33:30 +0000 | [diff] [blame] | 68 | else: |
| 69 | glob_in_dir = glob0 |
| 70 | for dirname in dirs: |
| 71 | for name in glob_in_dir(dirname, basename): |
| 72 | yield os.path.join(dirname, name) |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 73 | |
| 74 | # These 2 helper functions non-recursively glob inside a literal directory. |
| 75 | # They return a list of basenames. `glob1` accepts a pattern while `glob0` |
| 76 | # takes a literal basename (so it only has to check for its existence). |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 77 | |
| 78 | def glob1(dirname, pattern): |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 79 | if not dirname: |
Guido van Rossum | f0af3e3 | 2008-10-02 18:55:37 +0000 | [diff] [blame] | 80 | if isinstance(pattern, bytes): |
| 81 | dirname = bytes(os.curdir, 'ASCII') |
| 82 | else: |
| 83 | dirname = os.curdir |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 84 | try: |
| 85 | names = os.listdir(dirname) |
Andrew Svetlov | ad28c7f | 2012-12-18 22:02:39 +0200 | [diff] [blame] | 86 | except OSError: |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 87 | return [] |
Hynek Schlawack | e26568f | 2012-12-27 10:10:11 +0100 | [diff] [blame] | 88 | if not _ishidden(pattern): |
| 89 | names = [x for x in names if not _ishidden(x)] |
Guido van Rossum | d8faa36 | 2007-04-27 19:54:29 +0000 | [diff] [blame] | 90 | return fnmatch.filter(names, pattern) |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 91 | |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 92 | def glob0(dirname, basename): |
Antoine Pitrou | 5461558 | 2012-12-16 16:03:01 +0100 | [diff] [blame] | 93 | if not basename: |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 94 | # `os.path.split()` returns an empty basename for paths ending with a |
| 95 | # directory separator. 'q*x/' should match only directories. |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 96 | if os.path.isdir(dirname): |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 97 | return [basename] |
| 98 | else: |
| 99 | if os.path.lexists(os.path.join(dirname, basename)): |
| 100 | return [basename] |
| 101 | return [] |
| 102 | |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 103 | # This helper function recursively yields relative pathnames inside a literal |
| 104 | # directory. |
| 105 | |
| 106 | def glob2(dirname, pattern): |
| 107 | assert _isrecursive(pattern) |
Serhiy Storchaka | 735b790 | 2015-11-09 23:12:07 +0200 | [diff] [blame] | 108 | yield pattern[:0] |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 109 | yield from _rlistdir(dirname) |
| 110 | |
| 111 | # Recursively yields relative pathnames inside a literal directory. |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 112 | def _rlistdir(dirname): |
| 113 | if not dirname: |
| 114 | if isinstance(dirname, bytes): |
| 115 | dirname = bytes(os.curdir, 'ASCII') |
| 116 | else: |
| 117 | dirname = os.curdir |
| 118 | try: |
| 119 | names = os.listdir(dirname) |
| 120 | except os.error: |
| 121 | return |
| 122 | for x in names: |
| 123 | if not _ishidden(x): |
| 124 | yield x |
| 125 | path = os.path.join(dirname, x) if dirname else x |
| 126 | for y in _rlistdir(path): |
| 127 | yield os.path.join(x, y) |
| 128 | |
Guido van Rossum | c2ef5c2 | 1992-01-12 23:32:11 +0000 | [diff] [blame] | 129 | |
Serhiy Storchaka | fd32fff | 2013-11-18 13:06:43 +0200 | [diff] [blame] | 130 | magic_check = re.compile('([*?[])') |
| 131 | magic_check_bytes = re.compile(b'([*?[])') |
Tim Golden | 9b3fb0c | 2012-11-06 15:33:30 +0000 | [diff] [blame] | 132 | |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 133 | def has_magic(s): |
Guido van Rossum | f0af3e3 | 2008-10-02 18:55:37 +0000 | [diff] [blame] | 134 | if isinstance(s, bytes): |
| 135 | match = magic_check_bytes.search(s) |
| 136 | else: |
| 137 | match = magic_check.search(s) |
| 138 | return match is not None |
Hynek Schlawack | e26568f | 2012-12-27 10:10:11 +0100 | [diff] [blame] | 139 | |
| 140 | def _ishidden(path): |
| 141 | return path[0] in ('.', b'.'[0]) |
Serhiy Storchaka | fd32fff | 2013-11-18 13:06:43 +0200 | [diff] [blame] | 142 | |
Serhiy Storchaka | c2edcdd | 2014-09-11 12:17:37 +0300 | [diff] [blame] | 143 | def _isrecursive(pattern): |
| 144 | if isinstance(pattern, bytes): |
| 145 | return pattern == b'**' |
| 146 | else: |
| 147 | return pattern == '**' |
| 148 | |
Serhiy Storchaka | fd32fff | 2013-11-18 13:06:43 +0200 | [diff] [blame] | 149 | def escape(pathname): |
| 150 | """Escape all special characters. |
| 151 | """ |
| 152 | # Escaping is done by wrapping any of "*?[" between square brackets. |
| 153 | # Metacharacters do not work in the drive part and shouldn't be escaped. |
| 154 | drive, pathname = os.path.splitdrive(pathname) |
| 155 | if isinstance(pathname, bytes): |
| 156 | pathname = magic_check_bytes.sub(br'[\1]', pathname) |
| 157 | else: |
| 158 | pathname = magic_check.sub(r'[\1]', pathname) |
| 159 | return drive + pathname |