Guido van Rossum | ab096c9 | 1997-04-02 05:47:11 +0000 | [diff] [blame] | 1 | """Filename globbing utility.""" |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 2 | |
Georg Brandl | 71ff646 | 2007-03-07 08:31:51 +0000 | [diff] [blame] | 3 | import sys |
Guido van Rossum | bba77af | 1992-01-12 23:26:24 +0000 | [diff] [blame] | 4 | import os |
Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame] | 5 | import re |
Georg Brandl | 71ff646 | 2007-03-07 08:31:51 +0000 | [diff] [blame] | 6 | import fnmatch |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 7 | |
Martin v. Löwis | ed11a5d | 2012-05-20 10:42:17 +0200 | [diff] [blame] | 8 | try: |
| 9 | _unicode = unicode |
| 10 | except NameError: |
| 11 | # If Python is built without Unicode support, the unicode type |
| 12 | # will not exist. Fake one. |
| 13 | class _unicode(object): |
| 14 | pass |
| 15 | |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 16 | __all__ = ["glob", "iglob"] |
Guido van Rossum | bba77af | 1992-01-12 23:26:24 +0000 | [diff] [blame] | 17 | |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 18 | def glob(pathname): |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 19 | """Return a list of paths matching a pathname pattern. |
Guido van Rossum | ab096c9 | 1997-04-02 05:47:11 +0000 | [diff] [blame] | 20 | |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 21 | The pattern may contain simple shell-style wildcards a la fnmatch. |
Guido van Rossum | ab096c9 | 1997-04-02 05:47:11 +0000 | [diff] [blame] | 22 | |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 23 | """ |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 24 | return list(iglob(pathname)) |
| 25 | |
| 26 | def iglob(pathname): |
Georg Brandl | 3ade761 | 2009-04-01 17:46:01 +0000 | [diff] [blame] | 27 | """Return an iterator which yields the paths matching a pathname pattern. |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 28 | |
| 29 | The pattern may contain simple shell-style wildcards a la fnmatch. |
| 30 | |
| 31 | """ |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 32 | if not has_magic(pathname): |
Johannes Gijsbers | ae882f7 | 2004-08-30 10:19:56 +0000 | [diff] [blame] | 33 | if os.path.lexists(pathname): |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 34 | yield pathname |
| 35 | return |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 36 | dirname, basename = os.path.split(pathname) |
Martin v. Löwis | b5d4d2a | 2001-06-06 06:24:38 +0000 | [diff] [blame] | 37 | if not dirname: |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 38 | for name in glob1(os.curdir, basename): |
| 39 | yield name |
| 40 | return |
Antoine Pitrou | 124ee8b | 2012-12-16 13:55:47 +0100 | [diff] [blame] | 41 | # `os.path.split()` returns the argument itself as a dirname if it is a |
| 42 | # drive or UNC path. Prevent an infinite recursion if a drive or UNC path |
| 43 | # contains magic characters (i.e. r'\\?\C:'). |
| 44 | if dirname != pathname and has_magic(dirname): |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 45 | dirs = iglob(dirname) |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 46 | else: |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 47 | dirs = [dirname] |
| 48 | if has_magic(basename): |
| 49 | glob_in_dir = glob1 |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 50 | else: |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 51 | glob_in_dir = glob0 |
| 52 | for dirname in dirs: |
| 53 | for name in glob_in_dir(dirname, basename): |
| 54 | yield os.path.join(dirname, name) |
| 55 | |
| 56 | # These 2 helper functions non-recursively glob inside a literal directory. |
| 57 | # They return a list of basenames. `glob1` accepts a pattern while `glob0` |
| 58 | # takes a literal basename (so it only has to check for its existence). |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 59 | |
| 60 | def glob1(dirname, pattern): |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 61 | if not dirname: |
| 62 | dirname = os.curdir |
Martin v. Löwis | ed11a5d | 2012-05-20 10:42:17 +0200 | [diff] [blame] | 63 | if isinstance(pattern, _unicode) and not isinstance(dirname, unicode): |
Georg Brandl | b54a809 | 2007-03-20 23:05:14 +0000 | [diff] [blame] | 64 | dirname = unicode(dirname, sys.getfilesystemencoding() or |
| 65 | sys.getdefaultencoding()) |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 66 | try: |
| 67 | names = os.listdir(dirname) |
| 68 | except os.error: |
| 69 | return [] |
Georg Brandl | 71ff646 | 2007-03-07 08:31:51 +0000 | [diff] [blame] | 70 | if pattern[0] != '.': |
| 71 | names = filter(lambda x: x[0] != '.', names) |
| 72 | return fnmatch.filter(names, pattern) |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 73 | |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 74 | def glob0(dirname, basename): |
| 75 | if basename == '': |
| 76 | # `os.path.split()` returns an empty basename for paths ending with a |
| 77 | # directory separator. 'q*x/' should match only directories. |
Neal Norwitz | a31bf18 | 2006-04-09 03:35:43 +0000 | [diff] [blame] | 78 | if os.path.isdir(dirname): |
Johannes Gijsbers | 836f543 | 2005-01-08 13:13:19 +0000 | [diff] [blame] | 79 | return [basename] |
| 80 | else: |
| 81 | if os.path.lexists(os.path.join(dirname, basename)): |
| 82 | return [basename] |
| 83 | return [] |
| 84 | |
Guido van Rossum | c2ef5c2 | 1992-01-12 23:32:11 +0000 | [diff] [blame] | 85 | |
Guido van Rossum | 9694fca | 1997-10-22 21:00:49 +0000 | [diff] [blame] | 86 | magic_check = re.compile('[*?[]') |
Guido van Rossum | c2ef5c2 | 1992-01-12 23:32:11 +0000 | [diff] [blame] | 87 | |
Guido van Rossum | 65a9620 | 1991-01-01 18:17:49 +0000 | [diff] [blame] | 88 | def has_magic(s): |
Tim Peters | 07e99cb | 2001-01-14 23:47:14 +0000 | [diff] [blame] | 89 | return magic_check.search(s) is not None |