blob: 8b7b4ca297e125d10ba366da044fbe62c32bc010 [file] [log] [blame]
Derek Sollenberger2eb3b4d2016-01-11 14:41:40 -05001#!/usr/bin/env python
2# encoding: utf-8
3# Baptiste Lepilleur, 2009
4
5from __future__ import print_function
6from dircache import listdir
7import re
8import fnmatch
9import os.path
10
11
12# These fnmatch expressions are used by default to prune the directory tree
13# while doing the recursive traversal in the glob_impl method of glob function.
14prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
15
16# These fnmatch expressions are used by default to exclude files and dirs
17# while doing the recursive traversal in the glob_impl method of glob function.
18##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
19
20# These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
21# while doing the recursive traversal in the glob_impl method of glob function.
22default_excludes = '''
23**/*~
24**/#*#
25**/.#*
26**/%*%
27**/._*
28**/CVS
29**/CVS/**
30**/.cvsignore
31**/SCCS
32**/SCCS/**
33**/vssver.scc
34**/.svn
35**/.svn/**
36**/.git
37**/.git/**
38**/.gitignore
39**/.bzr
40**/.bzr/**
41**/.hg
42**/.hg/**
43**/_MTN
44**/_MTN/**
45**/_darcs
46**/_darcs/**
47**/.DS_Store '''
48
49DIR = 1
50FILE = 2
51DIR_LINK = 4
52FILE_LINK = 8
53LINKS = DIR_LINK | FILE_LINK
54ALL_NO_LINK = DIR | FILE
55ALL = DIR | FILE | LINKS
56
57_ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' )
58
59def ant_pattern_to_re( ant_pattern ):
60 """Generates a regular expression from the ant pattern.
61 Matching convention:
62 **/a: match 'a', 'dir/a', 'dir1/dir2/a'
63 a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
64 *.py: match 'script.py' but not 'a/script.py'
65 """
66 rex = ['^']
67 next_pos = 0
68 sep_rex = r'(?:/|%s)' % re.escape( os.path.sep )
69## print 'Converting', ant_pattern
70 for match in _ANT_RE.finditer( ant_pattern ):
71## print 'Matched', match.group()
72## print match.start(0), next_pos
73 if match.start(0) != next_pos:
74 raise ValueError( "Invalid ant pattern" )
75 if match.group(1): # /**/
76 rex.append( sep_rex + '(?:.*%s)?' % sep_rex )
77 elif match.group(2): # **/
78 rex.append( '(?:.*%s)?' % sep_rex )
79 elif match.group(3): # /**
80 rex.append( sep_rex + '.*' )
81 elif match.group(4): # *
82 rex.append( '[^/%s]*' % re.escape(os.path.sep) )
83 elif match.group(5): # /
84 rex.append( sep_rex )
85 else: # somepath
86 rex.append( re.escape(match.group(6)) )
87 next_pos = match.end()
88 rex.append('$')
89 return re.compile( ''.join( rex ) )
90
91def _as_list( l ):
92 if isinstance(l, basestring):
93 return l.split()
94 return l
95
96def glob(dir_path,
97 includes = '**/*',
98 excludes = default_excludes,
99 entry_type = FILE,
100 prune_dirs = prune_dirs,
101 max_depth = 25):
102 include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
103 exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
104 prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
105 dir_path = dir_path.replace('/',os.path.sep)
106 entry_type_filter = entry_type
107
108 def is_pruned_dir( dir_name ):
109 for pattern in prune_dirs:
110 if fnmatch.fnmatch( dir_name, pattern ):
111 return True
112 return False
113
114 def apply_filter( full_path, filter_rexs ):
115 """Return True if at least one of the filter regular expression match full_path."""
116 for rex in filter_rexs:
117 if rex.match( full_path ):
118 return True
119 return False
120
121 def glob_impl( root_dir_path ):
122 child_dirs = [root_dir_path]
123 while child_dirs:
124 dir_path = child_dirs.pop()
125 for entry in listdir( dir_path ):
126 full_path = os.path.join( dir_path, entry )
127## print 'Testing:', full_path,
128 is_dir = os.path.isdir( full_path )
129 if is_dir and not is_pruned_dir( entry ): # explore child directory ?
130## print '===> marked for recursion',
131 child_dirs.append( full_path )
132 included = apply_filter( full_path, include_filter )
133 rejected = apply_filter( full_path, exclude_filter )
134 if not included or rejected: # do not include entry ?
135## print '=> not included or rejected'
136 continue
137 link = os.path.islink( full_path )
138 is_file = os.path.isfile( full_path )
139 if not is_file and not is_dir:
140## print '=> unknown entry type'
141 continue
142 if link:
143 entry_type = is_file and FILE_LINK or DIR_LINK
144 else:
145 entry_type = is_file and FILE or DIR
146## print '=> type: %d' % entry_type,
147 if (entry_type & entry_type_filter) != 0:
148## print ' => KEEP'
149 yield os.path.join( dir_path, entry )
150## else:
151## print ' => TYPE REJECTED'
152 return list( glob_impl( dir_path ) )
153
154
155if __name__ == "__main__":
156 import unittest
157
158 class AntPatternToRETest(unittest.TestCase):
159## def test_conversion( self ):
160## self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern )
161
162 def test_matching( self ):
163 test_cases = [ ( 'path',
164 ['path'],
165 ['somepath', 'pathsuffix', '/path', '/path'] ),
166 ( '*.py',
167 ['source.py', 'source.ext.py', '.py'],
168 ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ),
169 ( '**/path',
170 ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
171 ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ),
172 ( 'path/**',
173 ['path/a', 'path/path/a', 'path//'],
174 ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ),
175 ( '/**/path',
176 ['/path', '/a/path', '/a/b/path/path', '/path/path'],
177 ['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ),
178 ( 'a/b',
179 ['a/b'],
180 ['somea/b', 'a/bsuffix', 'a/b/c'] ),
181 ( '**/*.py',
182 ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
183 ['script.pyc', 'script.pyo', 'a.py/b'] ),
184 ( 'src/**/*.py',
185 ['src/a.py', 'src/dir/a.py'],
186 ['a/src/a.py', '/src/a.py'] ),
187 ]
188 for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
189 def local_path( paths ):
190 return [ p.replace('/',os.path.sep) for p in paths ]
191 test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) )
192 for ant_pattern, accepted_matches, rejected_matches in test_cases:
193 rex = ant_pattern_to_re( ant_pattern )
194 print('ant_pattern:', ant_pattern, ' => ', rex.pattern)
195 for accepted_match in accepted_matches:
196 print('Accepted?:', accepted_match)
197 self.assertTrue( rex.match( accepted_match ) is not None )
198 for rejected_match in rejected_matches:
199 print('Rejected?:', rejected_match)
200 self.assertTrue( rex.match( rejected_match ) is None )
201
202 unittest.main()