Derek Sollenberger | 2eb3b4d | 2016-01-11 14:41:40 -0500 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # encoding: utf-8 |
| 3 | # Baptiste Lepilleur, 2009 |
| 4 | |
| 5 | from __future__ import print_function |
| 6 | from dircache import listdir |
| 7 | import re |
| 8 | import fnmatch |
| 9 | import os.path |
| 10 | |
| 11 | |
| 12 | # These fnmatch expressions are used by default to prune the directory tree |
| 13 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 14 | prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS ' |
| 15 | |
| 16 | # These fnmatch expressions are used by default to exclude files and dirs |
| 17 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 18 | ##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split() |
| 19 | |
| 20 | # These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree |
| 21 | # while doing the recursive traversal in the glob_impl method of glob function. |
| 22 | default_excludes = ''' |
| 23 | **/*~ |
| 24 | **/#*# |
| 25 | **/.#* |
| 26 | **/%*% |
| 27 | **/._* |
| 28 | **/CVS |
| 29 | **/CVS/** |
| 30 | **/.cvsignore |
| 31 | **/SCCS |
| 32 | **/SCCS/** |
| 33 | **/vssver.scc |
| 34 | **/.svn |
| 35 | **/.svn/** |
| 36 | **/.git |
| 37 | **/.git/** |
| 38 | **/.gitignore |
| 39 | **/.bzr |
| 40 | **/.bzr/** |
| 41 | **/.hg |
| 42 | **/.hg/** |
| 43 | **/_MTN |
| 44 | **/_MTN/** |
| 45 | **/_darcs |
| 46 | **/_darcs/** |
| 47 | **/.DS_Store ''' |
| 48 | |
| 49 | DIR = 1 |
| 50 | FILE = 2 |
| 51 | DIR_LINK = 4 |
| 52 | FILE_LINK = 8 |
| 53 | LINKS = DIR_LINK | FILE_LINK |
| 54 | ALL_NO_LINK = DIR | FILE |
| 55 | ALL = DIR | FILE | LINKS |
| 56 | |
| 57 | _ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' ) |
| 58 | |
| 59 | def ant_pattern_to_re( ant_pattern ): |
| 60 | """Generates a regular expression from the ant pattern. |
| 61 | Matching convention: |
| 62 | **/a: match 'a', 'dir/a', 'dir1/dir2/a' |
| 63 | a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b' |
| 64 | *.py: match 'script.py' but not 'a/script.py' |
| 65 | """ |
| 66 | rex = ['^'] |
| 67 | next_pos = 0 |
| 68 | sep_rex = r'(?:/|%s)' % re.escape( os.path.sep ) |
| 69 | ## print 'Converting', ant_pattern |
| 70 | for match in _ANT_RE.finditer( ant_pattern ): |
| 71 | ## print 'Matched', match.group() |
| 72 | ## print match.start(0), next_pos |
| 73 | if match.start(0) != next_pos: |
| 74 | raise ValueError( "Invalid ant pattern" ) |
| 75 | if match.group(1): # /**/ |
| 76 | rex.append( sep_rex + '(?:.*%s)?' % sep_rex ) |
| 77 | elif match.group(2): # **/ |
| 78 | rex.append( '(?:.*%s)?' % sep_rex ) |
| 79 | elif match.group(3): # /** |
| 80 | rex.append( sep_rex + '.*' ) |
| 81 | elif match.group(4): # * |
| 82 | rex.append( '[^/%s]*' % re.escape(os.path.sep) ) |
| 83 | elif match.group(5): # / |
| 84 | rex.append( sep_rex ) |
| 85 | else: # somepath |
| 86 | rex.append( re.escape(match.group(6)) ) |
| 87 | next_pos = match.end() |
| 88 | rex.append('$') |
| 89 | return re.compile( ''.join( rex ) ) |
| 90 | |
| 91 | def _as_list( l ): |
| 92 | if isinstance(l, basestring): |
| 93 | return l.split() |
| 94 | return l |
| 95 | |
| 96 | def glob(dir_path, |
| 97 | includes = '**/*', |
| 98 | excludes = default_excludes, |
| 99 | entry_type = FILE, |
| 100 | prune_dirs = prune_dirs, |
| 101 | max_depth = 25): |
| 102 | include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)] |
| 103 | exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)] |
| 104 | prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)] |
| 105 | dir_path = dir_path.replace('/',os.path.sep) |
| 106 | entry_type_filter = entry_type |
| 107 | |
| 108 | def is_pruned_dir( dir_name ): |
| 109 | for pattern in prune_dirs: |
| 110 | if fnmatch.fnmatch( dir_name, pattern ): |
| 111 | return True |
| 112 | return False |
| 113 | |
| 114 | def apply_filter( full_path, filter_rexs ): |
| 115 | """Return True if at least one of the filter regular expression match full_path.""" |
| 116 | for rex in filter_rexs: |
| 117 | if rex.match( full_path ): |
| 118 | return True |
| 119 | return False |
| 120 | |
| 121 | def glob_impl( root_dir_path ): |
| 122 | child_dirs = [root_dir_path] |
| 123 | while child_dirs: |
| 124 | dir_path = child_dirs.pop() |
| 125 | for entry in listdir( dir_path ): |
| 126 | full_path = os.path.join( dir_path, entry ) |
| 127 | ## print 'Testing:', full_path, |
| 128 | is_dir = os.path.isdir( full_path ) |
| 129 | if is_dir and not is_pruned_dir( entry ): # explore child directory ? |
| 130 | ## print '===> marked for recursion', |
| 131 | child_dirs.append( full_path ) |
| 132 | included = apply_filter( full_path, include_filter ) |
| 133 | rejected = apply_filter( full_path, exclude_filter ) |
| 134 | if not included or rejected: # do not include entry ? |
| 135 | ## print '=> not included or rejected' |
| 136 | continue |
| 137 | link = os.path.islink( full_path ) |
| 138 | is_file = os.path.isfile( full_path ) |
| 139 | if not is_file and not is_dir: |
| 140 | ## print '=> unknown entry type' |
| 141 | continue |
| 142 | if link: |
| 143 | entry_type = is_file and FILE_LINK or DIR_LINK |
| 144 | else: |
| 145 | entry_type = is_file and FILE or DIR |
| 146 | ## print '=> type: %d' % entry_type, |
| 147 | if (entry_type & entry_type_filter) != 0: |
| 148 | ## print ' => KEEP' |
| 149 | yield os.path.join( dir_path, entry ) |
| 150 | ## else: |
| 151 | ## print ' => TYPE REJECTED' |
| 152 | return list( glob_impl( dir_path ) ) |
| 153 | |
| 154 | |
| 155 | if __name__ == "__main__": |
| 156 | import unittest |
| 157 | |
| 158 | class AntPatternToRETest(unittest.TestCase): |
| 159 | ## def test_conversion( self ): |
| 160 | ## self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern ) |
| 161 | |
| 162 | def test_matching( self ): |
| 163 | test_cases = [ ( 'path', |
| 164 | ['path'], |
| 165 | ['somepath', 'pathsuffix', '/path', '/path'] ), |
| 166 | ( '*.py', |
| 167 | ['source.py', 'source.ext.py', '.py'], |
| 168 | ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ), |
| 169 | ( '**/path', |
| 170 | ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'], |
| 171 | ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ), |
| 172 | ( 'path/**', |
| 173 | ['path/a', 'path/path/a', 'path//'], |
| 174 | ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ), |
| 175 | ( '/**/path', |
| 176 | ['/path', '/a/path', '/a/b/path/path', '/path/path'], |
| 177 | ['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ), |
| 178 | ( 'a/b', |
| 179 | ['a/b'], |
| 180 | ['somea/b', 'a/bsuffix', 'a/b/c'] ), |
| 181 | ( '**/*.py', |
| 182 | ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'], |
| 183 | ['script.pyc', 'script.pyo', 'a.py/b'] ), |
| 184 | ( 'src/**/*.py', |
| 185 | ['src/a.py', 'src/dir/a.py'], |
| 186 | ['a/src/a.py', '/src/a.py'] ), |
| 187 | ] |
| 188 | for ant_pattern, accepted_matches, rejected_matches in list(test_cases): |
| 189 | def local_path( paths ): |
| 190 | return [ p.replace('/',os.path.sep) for p in paths ] |
| 191 | test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) ) |
| 192 | for ant_pattern, accepted_matches, rejected_matches in test_cases: |
| 193 | rex = ant_pattern_to_re( ant_pattern ) |
| 194 | print('ant_pattern:', ant_pattern, ' => ', rex.pattern) |
| 195 | for accepted_match in accepted_matches: |
| 196 | print('Accepted?:', accepted_match) |
| 197 | self.assertTrue( rex.match( accepted_match ) is not None ) |
| 198 | for rejected_match in rejected_matches: |
| 199 | print('Rejected?:', rejected_match) |
| 200 | self.assertTrue( rex.match( rejected_match ) is None ) |
| 201 | |
| 202 | unittest.main() |