blob: 06a8da9a0770ba2ea04c29a0b96d67df4a40e58b [file] [log] [blame]
Greg Wardadc11722000-07-30 00:04:17 +00001"""distutils.filelist
2
3Provides the FileList class, used for poking about the filesystem
4and building lists of files.
5"""
6
Greg Wardadc11722000-07-30 00:04:17 +00007__revision__ = "$Id$"
8
Neal Norwitz9d72bb42007-04-17 08:48:32 +00009import os, re
Greg Wardadc11722000-07-30 00:04:17 +000010import fnmatch
Greg Wardadc11722000-07-30 00:04:17 +000011from distutils.util import convert_path
Greg Ward7b3d56c2000-07-30 00:21:36 +000012from distutils.errors import DistutilsTemplateError, DistutilsInternalError
Jeremy Hylton4f2f1332002-06-04 21:04:03 +000013from distutils import log
Greg Wardadc11722000-07-30 00:04:17 +000014
15class FileList:
Greg Wardc98927a2000-07-30 00:08:13 +000016 """A list of files built by on exploring the filesystem and filtered by
17 applying various patterns to what we find there.
Greg Wardadc11722000-07-30 00:04:17 +000018
Greg Wardc98927a2000-07-30 00:08:13 +000019 Instance attributes:
20 dir
21 directory from which files will be taken -- only used if
22 'allfiles' not supplied to constructor
23 files
24 list of filenames currently being built/filtered/manipulated
25 allfiles
26 complete list of files under consideration (ie. without any
27 filtering applied)
28 """
Greg Wardadc11722000-07-30 00:04:17 +000029
Collin Winter5b7e9d72007-08-30 03:52:21 +000030 def __init__(self, warn=None, debug_print=None):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +000031 # ignore argument to FileList, but keep them for backwards
32 # compatibility
Greg Ward979db972000-07-30 01:45:42 +000033 self.allfiles = None
34 self.files = []
Greg Wardadc11722000-07-30 00:04:17 +000035
Collin Winter5b7e9d72007-08-30 03:52:21 +000036 def set_allfiles(self, allfiles):
Greg Ward979db972000-07-30 01:45:42 +000037 self.allfiles = allfiles
38
Collin Winter5b7e9d72007-08-30 03:52:21 +000039 def findall(self, dir=os.curdir):
Greg Ward979db972000-07-30 01:45:42 +000040 self.allfiles = findall(dir)
41
Collin Winter5b7e9d72007-08-30 03:52:21 +000042 def debug_print(self, msg):
Greg Wardadc11722000-07-30 00:04:17 +000043 """Print 'msg' to stdout if the global DEBUG (taken from the
44 DISTUTILS_DEBUG environment variable) flag is true.
45 """
Jeremy Hyltonfcd73532002-09-11 16:31:53 +000046 from distutils.debug import DEBUG
Greg Wardadc11722000-07-30 00:04:17 +000047 if DEBUG:
Guido van Rossumbe19ed72007-02-09 05:37:30 +000048 print(msg)
Greg Wardadc11722000-07-30 00:04:17 +000049
Greg Ward979db972000-07-30 01:45:42 +000050 # -- List-like methods ---------------------------------------------
51
Collin Winter5b7e9d72007-08-30 03:52:21 +000052 def append(self, item):
Greg Ward979db972000-07-30 01:45:42 +000053 self.files.append(item)
54
Collin Winter5b7e9d72007-08-30 03:52:21 +000055 def extend(self, items):
Greg Ward979db972000-07-30 01:45:42 +000056 self.files.extend(items)
57
Collin Winter5b7e9d72007-08-30 03:52:21 +000058 def sort(self):
Greg Ward979db972000-07-30 01:45:42 +000059 # Not a strict lexical sort!
Collin Winterdc40ae62007-07-17 00:39:32 +000060 sortable_files = sorted(map(os.path.split, self.files))
Greg Ward979db972000-07-30 01:45:42 +000061 self.files = []
62 for sort_tuple in sortable_files:
Neal Norwitzd9108552006-03-17 08:00:19 +000063 self.files.append(os.path.join(*sort_tuple))
Greg Ward979db972000-07-30 01:45:42 +000064
65
66 # -- Other miscellaneous utility methods ---------------------------
67
Collin Winter5b7e9d72007-08-30 03:52:21 +000068 def remove_duplicates(self):
Greg Ward979db972000-07-30 01:45:42 +000069 # Assumes list has been sorted!
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +000070 for i in range(len(self.files) - 1, 0, -1):
71 if self.files[i] == self.files[i - 1]:
Greg Ward979db972000-07-30 01:45:42 +000072 del self.files[i]
73
74
75 # -- "File template" methods ---------------------------------------
Fred Drakeb94b8492001-12-06 20:51:35 +000076
Collin Winter5b7e9d72007-08-30 03:52:21 +000077 def _parse_template_line(self, line):
Neal Norwitz9d72bb42007-04-17 08:48:32 +000078 words = line.split()
Greg Wardc98927a2000-07-30 00:08:13 +000079 action = words[0]
Greg Wardadc11722000-07-30 00:04:17 +000080
Greg Ward7b3d56c2000-07-30 00:21:36 +000081 patterns = dir = dir_pattern = None
82
83 if action in ('include', 'exclude',
84 'global-include', 'global-exclude'):
Greg Ward071ed762000-09-26 02:12:31 +000085 if len(words) < 2:
Collin Winter5b7e9d72007-08-30 03:52:21 +000086 raise DistutilsTemplateError(
87 "'%s' expects <pattern1> <pattern2> ..." % action)
Amaury Forgeot d'Arc61cb0872008-07-26 20:09:45 +000088 patterns = [convert_path(w) for w in words[1:]]
Greg Ward7b3d56c2000-07-30 00:21:36 +000089 elif action in ('recursive-include', 'recursive-exclude'):
Greg Ward071ed762000-09-26 02:12:31 +000090 if len(words) < 3:
Collin Winter5b7e9d72007-08-30 03:52:21 +000091 raise DistutilsTemplateError(
92 "'%s' expects <dir> <pattern1> <pattern2> ..." % action)
Greg Wardc98927a2000-07-30 00:08:13 +000093 dir = convert_path(words[1])
Amaury Forgeot d'Arc61cb0872008-07-26 20:09:45 +000094 patterns = [convert_path(w) for w in words[2:]]
Greg Ward7b3d56c2000-07-30 00:21:36 +000095 elif action in ('graft', 'prune'):
Greg Ward071ed762000-09-26 02:12:31 +000096 if len(words) != 2:
Collin Winter5b7e9d72007-08-30 03:52:21 +000097 raise DistutilsTemplateError(
98 "'%s' expects a single <dir_pattern>" % action)
Greg Ward7b3d56c2000-07-30 00:21:36 +000099 dir_pattern = convert_path(words[1])
Greg Wardc98927a2000-07-30 00:08:13 +0000100 else:
Collin Winter5b7e9d72007-08-30 03:52:21 +0000101 raise DistutilsTemplateError("unknown action '%s'" % action)
Greg Ward7b3d56c2000-07-30 00:21:36 +0000102
Greg Wardd5dcc172000-07-30 01:04:22 +0000103 return (action, patterns, dir, dir_pattern)
Greg Ward7b3d56c2000-07-30 00:21:36 +0000104
Collin Winter5b7e9d72007-08-30 03:52:21 +0000105 def process_template_line(self, line):
Greg Ward7b3d56c2000-07-30 00:21:36 +0000106 # Parse the line: split it up, make sure the right number of words
Greg Ward0f341852000-07-30 00:36:25 +0000107 # is there, and return the relevant words. 'action' is always
Greg Ward7b3d56c2000-07-30 00:21:36 +0000108 # defined: it's the first word of the line. Which of the other
109 # three are defined depends on the action; it'll be either
110 # patterns, (dir and patterns), or (dir_pattern).
Tarek Ziadé36797272010-07-22 12:50:05 +0000111 (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
Greg Wardadc11722000-07-30 00:04:17 +0000112
Greg Wardc98927a2000-07-30 00:08:13 +0000113 # OK, now we know that the action is valid and we have the
114 # right number of words on the line for that action -- so we
Greg Ward7b3d56c2000-07-30 00:21:36 +0000115 # can proceed with minimal error-checking.
Greg Wardc98927a2000-07-30 00:08:13 +0000116 if action == 'include':
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000117 self.debug_print("include " + ' '.join(patterns))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000118 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000119 if not self.include_pattern(pattern, anchor=1):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000120 log.warn("warning: no files found matching '%s'",
121 pattern)
Greg Wardadc11722000-07-30 00:04:17 +0000122
Greg Wardc98927a2000-07-30 00:08:13 +0000123 elif action == 'exclude':
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000124 self.debug_print("exclude " + ' '.join(patterns))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000125 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000126 if not self.exclude_pattern(pattern, anchor=1):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000127 log.warn(("warning: no previously-included files "
128 "found matching '%s'"), pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000129
130 elif action == 'global-include':
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000131 self.debug_print("global-include " + ' '.join(patterns))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000132 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000133 if not self.include_pattern(pattern, anchor=0):
Collin Winter5b7e9d72007-08-30 03:52:21 +0000134 log.warn(("warning: no files found matching '%s' "
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000135 "anywhere in distribution"), pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000136
137 elif action == 'global-exclude':
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000138 self.debug_print("global-exclude " + ' '.join(patterns))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000139 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000140 if not self.exclude_pattern(pattern, anchor=0):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000141 log.warn(("warning: no previously-included files matching "
142 "'%s' found anywhere in distribution"),
143 pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000144
145 elif action == 'recursive-include':
146 self.debug_print("recursive-include %s %s" %
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000147 (dir, ' '.join(patterns)))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000148 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000149 if not self.include_pattern(pattern, prefix=dir):
Collin Winter5b7e9d72007-08-30 03:52:21 +0000150 log.warn(("warning: no files found matching '%s' "
Tim Peters182b5ac2004-07-18 06:16:08 +0000151 "under directory '%s'"),
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000152 pattern, dir)
Greg Wardc98927a2000-07-30 00:08:13 +0000153
154 elif action == 'recursive-exclude':
155 self.debug_print("recursive-exclude %s %s" %
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000156 (dir, ' '.join(patterns)))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000157 for pattern in patterns:
Greg Wardc98927a2000-07-30 00:08:13 +0000158 if not self.exclude_pattern(pattern, prefix=dir):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000159 log.warn(("warning: no previously-included files matching "
160 "'%s' found under directory '%s'"),
161 pattern, dir)
Fred Drakeb94b8492001-12-06 20:51:35 +0000162
Greg Wardc98927a2000-07-30 00:08:13 +0000163 elif action == 'graft':
164 self.debug_print("graft " + dir_pattern)
Greg Ward0f341852000-07-30 00:36:25 +0000165 if not self.include_pattern(None, prefix=dir_pattern):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000166 log.warn("warning: no directories found matching '%s'",
167 dir_pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000168
169 elif action == 'prune':
170 self.debug_print("prune " + dir_pattern)
171 if not self.exclude_pattern(None, prefix=dir_pattern):
Collin Winter5b7e9d72007-08-30 03:52:21 +0000172 log.warn(("no previously-included directories found "
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000173 "matching '%s'"), dir_pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000174 else:
Collin Winter5b7e9d72007-08-30 03:52:21 +0000175 raise DistutilsInternalError(
176 "this cannot happen: invalid action '%s'" % action)
Greg Wardadc11722000-07-30 00:04:17 +0000177
Tarek Ziadé36797272010-07-22 12:50:05 +0000178
Greg Ward979db972000-07-30 01:45:42 +0000179 # -- Filtering/selection methods -----------------------------------
180
Collin Winter5b7e9d72007-08-30 03:52:21 +0000181 def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
Greg Ward0f341852000-07-30 00:36:25 +0000182 """Select strings (presumably filenames) from 'self.files' that
Tarek Ziadé36797272010-07-22 12:50:05 +0000183 match 'pattern', a Unix-style wildcard (glob) pattern. Patterns
184 are not quite the same as implemented by the 'fnmatch' module: '*'
185 and '?' match non-special characters, where "special" is platform-
186 dependent: slash on Unix; colon, slash, and backslash on
Greg Ward0f341852000-07-30 00:36:25 +0000187 DOS/Windows; and colon on Mac OS.
Greg Wardadc11722000-07-30 00:04:17 +0000188
189 If 'anchor' is true (the default), then the pattern match is more
190 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
191 'anchor' is false, both of these will match.
192
193 If 'prefix' is supplied, then only filenames starting with 'prefix'
194 (itself a pattern) and ending with 'pattern', with anything in between
195 them, will match. 'anchor' is ignored in this case.
196
197 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
198 'pattern' is assumed to be either a string containing a regex or a
199 regex object -- no translation is done, the regex is just compiled
200 and used as-is.
201
202 Selected strings will be added to self.files.
203
Collin Winter5b7e9d72007-08-30 03:52:21 +0000204 Return True if files are found, False otherwise.
Greg Wardadc11722000-07-30 00:04:17 +0000205 """
Collin Winter5b7e9d72007-08-30 03:52:21 +0000206 files_found = False
Greg Ward071ed762000-09-26 02:12:31 +0000207 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
Greg Ward0f341852000-07-30 00:36:25 +0000208 self.debug_print("include_pattern: applying regex r'%s'" %
Greg Wardadc11722000-07-30 00:04:17 +0000209 pattern_re.pattern)
210
211 # delayed loading of allfiles list
Greg Ward979db972000-07-30 01:45:42 +0000212 if self.allfiles is None:
213 self.findall()
Greg Wardadc11722000-07-30 00:04:17 +0000214
215 for name in self.allfiles:
Greg Ward071ed762000-09-26 02:12:31 +0000216 if pattern_re.search(name):
Greg Wardadc11722000-07-30 00:04:17 +0000217 self.debug_print(" adding " + name)
Greg Ward071ed762000-09-26 02:12:31 +0000218 self.files.append(name)
Collin Winter5b7e9d72007-08-30 03:52:21 +0000219 files_found = True
Greg Wardadc11722000-07-30 00:04:17 +0000220 return files_found
221
Greg Wardadc11722000-07-30 00:04:17 +0000222
Tarek Ziadé36797272010-07-22 12:50:05 +0000223 def exclude_pattern (self, pattern,
224 anchor=1, prefix=None, is_regex=0):
Greg Wardadc11722000-07-30 00:04:17 +0000225 """Remove strings (presumably filenames) from 'files' that match
Tarek Ziadé36797272010-07-22 12:50:05 +0000226 'pattern'. Other parameters are the same as for
227 'include_pattern()', above.
228 The list 'self.files' is modified in place.
229 Return True if files are found, False otherwise.
Greg Wardadc11722000-07-30 00:04:17 +0000230 """
Collin Winter5b7e9d72007-08-30 03:52:21 +0000231 files_found = False
Greg Ward071ed762000-09-26 02:12:31 +0000232 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
Greg Wardadc11722000-07-30 00:04:17 +0000233 self.debug_print("exclude_pattern: applying regex r'%s'" %
234 pattern_re.pattern)
Greg Ward071ed762000-09-26 02:12:31 +0000235 for i in range(len(self.files)-1, -1, -1):
236 if pattern_re.search(self.files[i]):
Greg Wardadc11722000-07-30 00:04:17 +0000237 self.debug_print(" removing " + self.files[i])
238 del self.files[i]
Collin Winter5b7e9d72007-08-30 03:52:21 +0000239 files_found = True
Greg Wardadc11722000-07-30 00:04:17 +0000240 return files_found
241
Greg Wardadc11722000-07-30 00:04:17 +0000242
243# ----------------------------------------------------------------------
244# Utility functions
245
Collin Winter5b7e9d72007-08-30 03:52:21 +0000246def findall(dir=os.curdir):
Greg Wardadc11722000-07-30 00:04:17 +0000247 """Find all files under 'dir' and return the list of full filenames
248 (relative to 'dir').
249 """
250 from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK
251
252 list = []
253 stack = [dir]
254 pop = stack.pop
255 push = stack.append
256
257 while stack:
258 dir = pop()
Greg Ward071ed762000-09-26 02:12:31 +0000259 names = os.listdir(dir)
Greg Wardadc11722000-07-30 00:04:17 +0000260
261 for name in names:
262 if dir != os.curdir: # avoid the dreaded "./" syndrome
Greg Ward071ed762000-09-26 02:12:31 +0000263 fullname = os.path.join(dir, name)
Greg Wardadc11722000-07-30 00:04:17 +0000264 else:
265 fullname = name
266
267 # Avoid excess stat calls -- just one will do, thank you!
268 stat = os.stat(fullname)
269 mode = stat[ST_MODE]
270 if S_ISREG(mode):
Greg Ward071ed762000-09-26 02:12:31 +0000271 list.append(fullname)
Greg Wardadc11722000-07-30 00:04:17 +0000272 elif S_ISDIR(mode) and not S_ISLNK(mode):
Greg Ward071ed762000-09-26 02:12:31 +0000273 push(fullname)
Greg Wardadc11722000-07-30 00:04:17 +0000274 return list
275
276
Collin Winter5b7e9d72007-08-30 03:52:21 +0000277def glob_to_re(pattern):
Tarek Ziadé36797272010-07-22 12:50:05 +0000278 """Translate a shell-like glob pattern to a regular expression; return
279 a string containing the regex. Differs from 'fnmatch.translate()' in
280 that '*' does not match "special characters" (which are
281 platform-specific).
Greg Wardadc11722000-07-30 00:04:17 +0000282 """
Greg Ward071ed762000-09-26 02:12:31 +0000283 pattern_re = fnmatch.translate(pattern)
Greg Wardadc11722000-07-30 00:04:17 +0000284
285 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
286 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
287 # and by extension they shouldn't match such "special characters" under
288 # any OS. So change all non-escaped dots in the RE to match any
289 # character except the special characters.
290 # XXX currently the "special characters" are just slash -- i.e. this is
291 # Unix-only.
Tarek Ziadé889b0aa2009-04-05 21:49:36 +0000292 pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
293
Greg Wardadc11722000-07-30 00:04:17 +0000294 return pattern_re
295
Greg Wardadc11722000-07-30 00:04:17 +0000296
Collin Winter5b7e9d72007-08-30 03:52:21 +0000297def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
Greg Wardadc11722000-07-30 00:04:17 +0000298 """Translate a shell-like wildcard pattern to a compiled regular
Tarek Ziadé36797272010-07-22 12:50:05 +0000299 expression. Return the compiled regex. If 'is_regex' true,
Greg Wardadc11722000-07-30 00:04:17 +0000300 then 'pattern' is directly compiled to a regex (if it's a string)
301 or just returned as-is (assumes it's a regex object).
302 """
303 if is_regex:
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000304 if isinstance(pattern, str):
Greg Wardadc11722000-07-30 00:04:17 +0000305 return re.compile(pattern)
306 else:
307 return pattern
308
309 if pattern:
Greg Ward071ed762000-09-26 02:12:31 +0000310 pattern_re = glob_to_re(pattern)
Greg Wardadc11722000-07-30 00:04:17 +0000311 else:
312 pattern_re = ''
Fred Drakeb94b8492001-12-06 20:51:35 +0000313
Greg Wardadc11722000-07-30 00:04:17 +0000314 if prefix is not None:
Tarek Ziadé74c23ac2009-08-17 21:35:46 +0000315 # ditch end of pattern character
316 empty_pattern = glob_to_re('')
Tarek Ziadé36797272010-07-22 12:50:05 +0000317 prefix_re = (glob_to_re(prefix))[:-len(empty_pattern)]
Greg Ward071ed762000-09-26 02:12:31 +0000318 pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
Greg Wardadc11722000-07-30 00:04:17 +0000319 else: # no prefix -- respect anchor flag
320 if anchor:
321 pattern_re = "^" + pattern_re
Fred Drakeb94b8492001-12-06 20:51:35 +0000322
Greg Ward071ed762000-09-26 02:12:31 +0000323 return re.compile(pattern_re)