blob: 211b65f8d2f4e6cc959c8705c7bdb90e0b2c2bec [file] [log] [blame]
Greg Wardadc11722000-07-30 00:04:17 +00001"""distutils.filelist
2
3Provides the FileList class, used for poking about the filesystem
4and building lists of files.
5"""
6
7# created 2000/07/17, Rene Liebscher (as template.py)
8# most parts taken from commands/sdist.py
9# renamed 2000/07/29 (to filelist.py) and officially added to
10# the Distutils source, Greg Ward
11
12__revision__ = "$Id$"
13
14import sys, os, string, re
15import fnmatch
16from types import *
17from glob import glob
18from distutils.util import convert_path
Greg Ward7b3d56c2000-07-30 00:21:36 +000019from distutils.errors import DistutilsTemplateError, DistutilsInternalError
Greg Wardadc11722000-07-30 00:04:17 +000020
21class FileList:
22
Greg Wardc98927a2000-07-30 00:08:13 +000023 """A list of files built by on exploring the filesystem and filtered by
24 applying various patterns to what we find there.
Greg Wardadc11722000-07-30 00:04:17 +000025
Greg Wardc98927a2000-07-30 00:08:13 +000026 Instance attributes:
27 dir
28 directory from which files will be taken -- only used if
29 'allfiles' not supplied to constructor
30 files
31 list of filenames currently being built/filtered/manipulated
32 allfiles
33 complete list of files under consideration (ie. without any
34 filtering applied)
35 """
Greg Wardadc11722000-07-30 00:04:17 +000036
37 def __init__(self,
Greg Wardadc11722000-07-30 00:04:17 +000038 warn=None,
39 debug_print=None):
Greg Ward979db972000-07-30 01:45:42 +000040 # use standard warning and debug functions if no other given
41 self.warn = warn or self.__warn
42 self.debug_print = debug_print or self.__debug_print
Greg Wardc98927a2000-07-30 00:08:13 +000043
Greg Ward979db972000-07-30 01:45:42 +000044 self.allfiles = None
45 self.files = []
Greg Wardadc11722000-07-30 00:04:17 +000046
47
Greg Ward979db972000-07-30 01:45:42 +000048 def set_allfiles (self, allfiles):
49 self.allfiles = allfiles
50
51 def findall (self, dir=os.curdir):
52 self.allfiles = findall(dir)
53
54
55 # -- Fallback warning/debug functions ------------------------------
56
Greg Wardadc11722000-07-30 00:04:17 +000057 def __warn (self, msg):
Greg Ward071ed762000-09-26 02:12:31 +000058 sys.stderr.write("warning: %s\n" % msg)
Greg Wardadc11722000-07-30 00:04:17 +000059
60 def __debug_print (self, msg):
61 """Print 'msg' to stdout if the global DEBUG (taken from the
62 DISTUTILS_DEBUG environment variable) flag is true.
63 """
64 from distutils.core import DEBUG
65 if DEBUG:
66 print msg
67
Greg Ward979db972000-07-30 01:45:42 +000068
69 # -- List-like methods ---------------------------------------------
70
71 def append (self, item):
72 self.files.append(item)
73
74 def extend (self, items):
75 self.files.extend(items)
76
77 def sort (self):
78 # Not a strict lexical sort!
79 sortable_files = map(os.path.split, self.files)
80 sortable_files.sort()
81 self.files = []
82 for sort_tuple in sortable_files:
83 self.files.append(apply(os.path.join, sort_tuple))
84
85
86 # -- Other miscellaneous utility methods ---------------------------
87
88 def remove_duplicates (self):
89 # Assumes list has been sorted!
Greg Ward071ed762000-09-26 02:12:31 +000090 for i in range(len(self.files)-1, 0, -1):
Greg Ward979db972000-07-30 01:45:42 +000091 if self.files[i] == self.files[i-1]:
92 del self.files[i]
93
94
95 # -- "File template" methods ---------------------------------------
Greg Wardadc11722000-07-30 00:04:17 +000096
Greg Ward7b3d56c2000-07-30 00:21:36 +000097 def _parse_template_line (self, line):
Greg Ward071ed762000-09-26 02:12:31 +000098 words = string.split(line)
Greg Wardc98927a2000-07-30 00:08:13 +000099 action = words[0]
Greg Wardadc11722000-07-30 00:04:17 +0000100
Greg Ward7b3d56c2000-07-30 00:21:36 +0000101 patterns = dir = dir_pattern = None
102
103 if action in ('include', 'exclude',
104 'global-include', 'global-exclude'):
Greg Ward071ed762000-09-26 02:12:31 +0000105 if len(words) < 2:
Greg Ward7b3d56c2000-07-30 00:21:36 +0000106 raise DistutilsTemplateError, \
107 "'%s' expects <pattern1> <pattern2> ..." % action
Greg Wardadc11722000-07-30 00:04:17 +0000108
Greg Ward7b3d56c2000-07-30 00:21:36 +0000109 patterns = map(convert_path, words[1:])
Greg Wardadc11722000-07-30 00:04:17 +0000110
Greg Ward7b3d56c2000-07-30 00:21:36 +0000111 elif action in ('recursive-include', 'recursive-exclude'):
Greg Ward071ed762000-09-26 02:12:31 +0000112 if len(words) < 3:
Greg Ward7b3d56c2000-07-30 00:21:36 +0000113 raise DistutilsTemplateError, \
114 "'%s' expects <dir> <pattern1> <pattern2> ..." % action
Greg Wardadc11722000-07-30 00:04:17 +0000115
Greg Wardc98927a2000-07-30 00:08:13 +0000116 dir = convert_path(words[1])
Greg Ward7b3d56c2000-07-30 00:21:36 +0000117 patterns = map(convert_path, words[2:])
Greg Wardadc11722000-07-30 00:04:17 +0000118
Greg Ward7b3d56c2000-07-30 00:21:36 +0000119 elif action in ('graft', 'prune'):
Greg Ward071ed762000-09-26 02:12:31 +0000120 if len(words) != 2:
Greg Ward7b3d56c2000-07-30 00:21:36 +0000121 raise DistutilsTemplateError, \
122 "'%s' expects a single <dir_pattern>" % action
Greg Wardadc11722000-07-30 00:04:17 +0000123
Greg Ward7b3d56c2000-07-30 00:21:36 +0000124 dir_pattern = convert_path(words[1])
Greg Wardadc11722000-07-30 00:04:17 +0000125
Greg Wardc98927a2000-07-30 00:08:13 +0000126 else:
Greg Ward7b3d56c2000-07-30 00:21:36 +0000127 raise DistutilsTemplateError, "unknown action '%s'" % action
128
Greg Wardd5dcc172000-07-30 01:04:22 +0000129 return (action, patterns, dir, dir_pattern)
Greg Ward7b3d56c2000-07-30 00:21:36 +0000130
131 # _parse_template_line ()
132
133
134 def process_template_line (self, line):
135
136 # Parse the line: split it up, make sure the right number of words
Greg Ward0f341852000-07-30 00:36:25 +0000137 # is there, and return the relevant words. 'action' is always
Greg Ward7b3d56c2000-07-30 00:21:36 +0000138 # defined: it's the first word of the line. Which of the other
139 # three are defined depends on the action; it'll be either
140 # patterns, (dir and patterns), or (dir_pattern).
141 (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
Greg Wardadc11722000-07-30 00:04:17 +0000142
Greg Wardc98927a2000-07-30 00:08:13 +0000143 # OK, now we know that the action is valid and we have the
144 # right number of words on the line for that action -- so we
Greg Ward7b3d56c2000-07-30 00:21:36 +0000145 # can proceed with minimal error-checking.
Greg Wardc98927a2000-07-30 00:08:13 +0000146 if action == 'include':
Greg Ward7b3d56c2000-07-30 00:21:36 +0000147 self.debug_print("include " + string.join(patterns))
148 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000149 if not self.include_pattern(pattern, anchor=1):
Greg Ward7b3d56c2000-07-30 00:21:36 +0000150 self.warn("no files found matching '%s'" % pattern)
Greg Wardadc11722000-07-30 00:04:17 +0000151
Greg Wardc98927a2000-07-30 00:08:13 +0000152 elif action == 'exclude':
Greg Ward7b3d56c2000-07-30 00:21:36 +0000153 self.debug_print("exclude " + string.join(patterns))
154 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000155 if not self.exclude_pattern(pattern, anchor=1):
Greg Ward7b3d56c2000-07-30 00:21:36 +0000156 self.warn(
Greg Wardc98927a2000-07-30 00:08:13 +0000157 "no previously-included files found matching '%s'"%
158 pattern)
159
160 elif action == 'global-include':
Greg Ward7b3d56c2000-07-30 00:21:36 +0000161 self.debug_print("global-include " + string.join(patterns))
162 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000163 if not self.include_pattern(pattern, anchor=0):
164 self.warn(("no files found matching '%s' " +
165 "anywhere in distribution") %
166 pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000167
168 elif action == 'global-exclude':
Greg Ward7b3d56c2000-07-30 00:21:36 +0000169 self.debug_print("global-exclude " + string.join(patterns))
170 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000171 if not self.exclude_pattern(pattern, anchor=0):
Greg Ward7b3d56c2000-07-30 00:21:36 +0000172 self.warn(("no previously-included files matching '%s' " +
173 "found anywhere in distribution") %
174 pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000175
176 elif action == 'recursive-include':
177 self.debug_print("recursive-include %s %s" %
Greg Ward7b3d56c2000-07-30 00:21:36 +0000178 (dir, string.join(patterns)))
179 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000180 if not self.include_pattern(pattern, prefix=dir):
181 self.warn(("no files found matching '%s' " +
Greg Ward7b3d56c2000-07-30 00:21:36 +0000182 "under directory '%s'") %
183 (pattern, dir))
Greg Wardc98927a2000-07-30 00:08:13 +0000184
185 elif action == 'recursive-exclude':
186 self.debug_print("recursive-exclude %s %s" %
Greg Ward7b3d56c2000-07-30 00:21:36 +0000187 (dir, string.join(patterns)))
188 for pattern in patterns:
Greg Wardc98927a2000-07-30 00:08:13 +0000189 if not self.exclude_pattern(pattern, prefix=dir):
Greg Ward7b3d56c2000-07-30 00:21:36 +0000190 self.warn(("no previously-included files matching '%s' " +
191 "found under directory '%s'") %
192 (pattern, dir))
Greg Ward071ed762000-09-26 02:12:31 +0000193
Greg Wardc98927a2000-07-30 00:08:13 +0000194 elif action == 'graft':
195 self.debug_print("graft " + dir_pattern)
Greg Ward0f341852000-07-30 00:36:25 +0000196 if not self.include_pattern(None, prefix=dir_pattern):
Greg Ward071ed762000-09-26 02:12:31 +0000197 self.warn("no directories found matching '%s'" % dir_pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000198
199 elif action == 'prune':
200 self.debug_print("prune " + dir_pattern)
201 if not self.exclude_pattern(None, prefix=dir_pattern):
Greg Ward7b3d56c2000-07-30 00:21:36 +0000202 self.warn(("no previously-included directories found " +
203 "matching '%s'") %
204 dir_pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000205 else:
Greg Ward7b3d56c2000-07-30 00:21:36 +0000206 raise DistutilsInternalError, \
Greg Wardc98927a2000-07-30 00:08:13 +0000207 "this cannot happen: invalid action '%s'" % action
Greg Wardadc11722000-07-30 00:04:17 +0000208
Greg Ward7b3d56c2000-07-30 00:21:36 +0000209 # process_template_line ()
Greg Wardadc11722000-07-30 00:04:17 +0000210
211
Greg Ward979db972000-07-30 01:45:42 +0000212 # -- Filtering/selection methods -----------------------------------
213
Greg Ward0f341852000-07-30 00:36:25 +0000214 def include_pattern (self, pattern,
Greg Ward071ed762000-09-26 02:12:31 +0000215 anchor=1, prefix=None, is_regex=0):
Greg Ward0f341852000-07-30 00:36:25 +0000216 """Select strings (presumably filenames) from 'self.files' that
217 match 'pattern', a Unix-style wildcard (glob) pattern. Patterns
218 are not quite the same as implemented by the 'fnmatch' module: '*'
219 and '?' match non-special characters, where "special" is platform-
220 dependent: slash on Unix; colon, slash, and backslash on
221 DOS/Windows; and colon on Mac OS.
Greg Wardadc11722000-07-30 00:04:17 +0000222
223 If 'anchor' is true (the default), then the pattern match is more
224 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
225 'anchor' is false, both of these will match.
226
227 If 'prefix' is supplied, then only filenames starting with 'prefix'
228 (itself a pattern) and ending with 'pattern', with anything in between
229 them, will match. 'anchor' is ignored in this case.
230
231 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
232 'pattern' is assumed to be either a string containing a regex or a
233 regex object -- no translation is done, the regex is just compiled
234 and used as-is.
235
236 Selected strings will be added to self.files.
237
238 Return 1 if files are found.
239 """
240 files_found = 0
Greg Ward071ed762000-09-26 02:12:31 +0000241 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
Greg Ward0f341852000-07-30 00:36:25 +0000242 self.debug_print("include_pattern: applying regex r'%s'" %
Greg Wardadc11722000-07-30 00:04:17 +0000243 pattern_re.pattern)
244
245 # delayed loading of allfiles list
Greg Ward979db972000-07-30 01:45:42 +0000246 if self.allfiles is None:
247 self.findall()
Greg Wardadc11722000-07-30 00:04:17 +0000248
249 for name in self.allfiles:
Greg Ward071ed762000-09-26 02:12:31 +0000250 if pattern_re.search(name):
Greg Wardadc11722000-07-30 00:04:17 +0000251 self.debug_print(" adding " + name)
Greg Ward071ed762000-09-26 02:12:31 +0000252 self.files.append(name)
Greg Wardadc11722000-07-30 00:04:17 +0000253 files_found = 1
254
255 return files_found
256
Greg Ward0f341852000-07-30 00:36:25 +0000257 # include_pattern ()
Greg Wardadc11722000-07-30 00:04:17 +0000258
259
260 def exclude_pattern (self, pattern,
261 anchor=1, prefix=None, is_regex=0):
262 """Remove strings (presumably filenames) from 'files' that match
263 'pattern'. Other parameters are the same as for
Greg Ward0f341852000-07-30 00:36:25 +0000264 'include_pattern()', above.
Greg Wardadc11722000-07-30 00:04:17 +0000265 The list 'self.files' is modified in place.
266 Return 1 if files are found.
267 """
268 files_found = 0
Greg Ward071ed762000-09-26 02:12:31 +0000269 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
Greg Wardadc11722000-07-30 00:04:17 +0000270 self.debug_print("exclude_pattern: applying regex r'%s'" %
271 pattern_re.pattern)
Greg Ward071ed762000-09-26 02:12:31 +0000272 for i in range(len(self.files)-1, -1, -1):
273 if pattern_re.search(self.files[i]):
Greg Wardadc11722000-07-30 00:04:17 +0000274 self.debug_print(" removing " + self.files[i])
275 del self.files[i]
276 files_found = 1
277
278 return files_found
279
280 # exclude_pattern ()
281
Greg Wardadc11722000-07-30 00:04:17 +0000282# class FileList
283
284
285# ----------------------------------------------------------------------
286# Utility functions
287
288def findall (dir = os.curdir):
289 """Find all files under 'dir' and return the list of full filenames
290 (relative to 'dir').
291 """
292 from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
293
294 list = []
295 stack = [dir]
296 pop = stack.pop
297 push = stack.append
298
299 while stack:
300 dir = pop()
Greg Ward071ed762000-09-26 02:12:31 +0000301 names = os.listdir(dir)
Greg Wardadc11722000-07-30 00:04:17 +0000302
303 for name in names:
304 if dir != os.curdir: # avoid the dreaded "./" syndrome
Greg Ward071ed762000-09-26 02:12:31 +0000305 fullname = os.path.join(dir, name)
Greg Wardadc11722000-07-30 00:04:17 +0000306 else:
307 fullname = name
308
309 # Avoid excess stat calls -- just one will do, thank you!
310 stat = os.stat(fullname)
311 mode = stat[ST_MODE]
312 if S_ISREG(mode):
Greg Ward071ed762000-09-26 02:12:31 +0000313 list.append(fullname)
Greg Wardadc11722000-07-30 00:04:17 +0000314 elif S_ISDIR(mode) and not S_ISLNK(mode):
Greg Ward071ed762000-09-26 02:12:31 +0000315 push(fullname)
Greg Wardadc11722000-07-30 00:04:17 +0000316
317 return list
318
319
320def glob_to_re (pattern):
321 """Translate a shell-like glob pattern to a regular expression; return
322 a string containing the regex. Differs from 'fnmatch.translate()' in
323 that '*' does not match "special characters" (which are
324 platform-specific).
325 """
Greg Ward071ed762000-09-26 02:12:31 +0000326 pattern_re = fnmatch.translate(pattern)
Greg Wardadc11722000-07-30 00:04:17 +0000327
328 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
329 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
330 # and by extension they shouldn't match such "special characters" under
331 # any OS. So change all non-escaped dots in the RE to match any
332 # character except the special characters.
333 # XXX currently the "special characters" are just slash -- i.e. this is
334 # Unix-only.
Greg Ward071ed762000-09-26 02:12:31 +0000335 pattern_re = re.sub(r'(^|[^\\])\.', r'\1[^/]', pattern_re)
Greg Wardadc11722000-07-30 00:04:17 +0000336 return pattern_re
337
338# glob_to_re ()
339
340
341def translate_pattern (pattern, anchor=1, prefix=None, is_regex=0):
342 """Translate a shell-like wildcard pattern to a compiled regular
343 expression. Return the compiled regex. If 'is_regex' true,
344 then 'pattern' is directly compiled to a regex (if it's a string)
345 or just returned as-is (assumes it's a regex object).
346 """
347 if is_regex:
348 if type(pattern) is StringType:
349 return re.compile(pattern)
350 else:
351 return pattern
352
353 if pattern:
Greg Ward071ed762000-09-26 02:12:31 +0000354 pattern_re = glob_to_re(pattern)
Greg Wardadc11722000-07-30 00:04:17 +0000355 else:
356 pattern_re = ''
357
358 if prefix is not None:
Greg Ward071ed762000-09-26 02:12:31 +0000359 prefix_re = (glob_to_re(prefix))[0:-1] # ditch trailing $
360 pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
Greg Wardadc11722000-07-30 00:04:17 +0000361 else: # no prefix -- respect anchor flag
362 if anchor:
363 pattern_re = "^" + pattern_re
364
Greg Ward071ed762000-09-26 02:12:31 +0000365 return re.compile(pattern_re)
Greg Wardadc11722000-07-30 00:04:17 +0000366
367# translate_pattern ()