blob: c92d5fdba393bb4bab0718ae7006d54eb80e1ec5 [file] [log] [blame]
Greg Wardadc11722000-07-30 00:04:17 +00001"""distutils.filelist
2
3Provides the FileList class, used for poking about the filesystem
4and building lists of files.
5"""
6
Neal Norwitz9d72bb42007-04-17 08:48:32 +00007import os, re
Greg Wardadc11722000-07-30 00:04:17 +00008import fnmatch
Jason R. Coombsedc4b2f2015-09-19 18:12:15 +02009import functools
Greg Wardadc11722000-07-30 00:04:17 +000010from distutils.util import convert_path
Greg Ward7b3d56c2000-07-30 00:21:36 +000011from distutils.errors import DistutilsTemplateError, DistutilsInternalError
Jeremy Hylton4f2f1332002-06-04 21:04:03 +000012from distutils import log
Greg Wardadc11722000-07-30 00:04:17 +000013
14class FileList:
Greg Wardc98927a2000-07-30 00:08:13 +000015 """A list of files built by on exploring the filesystem and filtered by
16 applying various patterns to what we find there.
Greg Wardadc11722000-07-30 00:04:17 +000017
Greg Wardc98927a2000-07-30 00:08:13 +000018 Instance attributes:
19 dir
20 directory from which files will be taken -- only used if
21 'allfiles' not supplied to constructor
22 files
23 list of filenames currently being built/filtered/manipulated
24 allfiles
25 complete list of files under consideration (ie. without any
26 filtering applied)
27 """
Greg Wardadc11722000-07-30 00:04:17 +000028
Collin Winter5b7e9d72007-08-30 03:52:21 +000029 def __init__(self, warn=None, debug_print=None):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +000030 # ignore argument to FileList, but keep them for backwards
31 # compatibility
Greg Ward979db972000-07-30 01:45:42 +000032 self.allfiles = None
33 self.files = []
Greg Wardadc11722000-07-30 00:04:17 +000034
Collin Winter5b7e9d72007-08-30 03:52:21 +000035 def set_allfiles(self, allfiles):
Greg Ward979db972000-07-30 01:45:42 +000036 self.allfiles = allfiles
37
Collin Winter5b7e9d72007-08-30 03:52:21 +000038 def findall(self, dir=os.curdir):
Greg Ward979db972000-07-30 01:45:42 +000039 self.allfiles = findall(dir)
40
Collin Winter5b7e9d72007-08-30 03:52:21 +000041 def debug_print(self, msg):
Greg Wardadc11722000-07-30 00:04:17 +000042 """Print 'msg' to stdout if the global DEBUG (taken from the
43 DISTUTILS_DEBUG environment variable) flag is true.
44 """
Jeremy Hyltonfcd73532002-09-11 16:31:53 +000045 from distutils.debug import DEBUG
Greg Wardadc11722000-07-30 00:04:17 +000046 if DEBUG:
Guido van Rossumbe19ed72007-02-09 05:37:30 +000047 print(msg)
Greg Wardadc11722000-07-30 00:04:17 +000048
Greg Ward979db972000-07-30 01:45:42 +000049 # -- List-like methods ---------------------------------------------
50
Collin Winter5b7e9d72007-08-30 03:52:21 +000051 def append(self, item):
Greg Ward979db972000-07-30 01:45:42 +000052 self.files.append(item)
53
Collin Winter5b7e9d72007-08-30 03:52:21 +000054 def extend(self, items):
Greg Ward979db972000-07-30 01:45:42 +000055 self.files.extend(items)
56
Collin Winter5b7e9d72007-08-30 03:52:21 +000057 def sort(self):
Greg Ward979db972000-07-30 01:45:42 +000058 # Not a strict lexical sort!
Collin Winterdc40ae62007-07-17 00:39:32 +000059 sortable_files = sorted(map(os.path.split, self.files))
Greg Ward979db972000-07-30 01:45:42 +000060 self.files = []
61 for sort_tuple in sortable_files:
Neal Norwitzd9108552006-03-17 08:00:19 +000062 self.files.append(os.path.join(*sort_tuple))
Greg Ward979db972000-07-30 01:45:42 +000063
64
65 # -- Other miscellaneous utility methods ---------------------------
66
Collin Winter5b7e9d72007-08-30 03:52:21 +000067 def remove_duplicates(self):
Greg Ward979db972000-07-30 01:45:42 +000068 # Assumes list has been sorted!
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +000069 for i in range(len(self.files) - 1, 0, -1):
70 if self.files[i] == self.files[i - 1]:
Greg Ward979db972000-07-30 01:45:42 +000071 del self.files[i]
72
73
74 # -- "File template" methods ---------------------------------------
Fred Drakeb94b8492001-12-06 20:51:35 +000075
Collin Winter5b7e9d72007-08-30 03:52:21 +000076 def _parse_template_line(self, line):
Neal Norwitz9d72bb42007-04-17 08:48:32 +000077 words = line.split()
Greg Wardc98927a2000-07-30 00:08:13 +000078 action = words[0]
Greg Wardadc11722000-07-30 00:04:17 +000079
Greg Ward7b3d56c2000-07-30 00:21:36 +000080 patterns = dir = dir_pattern = None
81
82 if action in ('include', 'exclude',
83 'global-include', 'global-exclude'):
Greg Ward071ed762000-09-26 02:12:31 +000084 if len(words) < 2:
Collin Winter5b7e9d72007-08-30 03:52:21 +000085 raise DistutilsTemplateError(
86 "'%s' expects <pattern1> <pattern2> ..." % action)
Amaury Forgeot d'Arc61cb0872008-07-26 20:09:45 +000087 patterns = [convert_path(w) for w in words[1:]]
Greg Ward7b3d56c2000-07-30 00:21:36 +000088 elif action in ('recursive-include', 'recursive-exclude'):
Greg Ward071ed762000-09-26 02:12:31 +000089 if len(words) < 3:
Collin Winter5b7e9d72007-08-30 03:52:21 +000090 raise DistutilsTemplateError(
91 "'%s' expects <dir> <pattern1> <pattern2> ..." % action)
Greg Wardc98927a2000-07-30 00:08:13 +000092 dir = convert_path(words[1])
Amaury Forgeot d'Arc61cb0872008-07-26 20:09:45 +000093 patterns = [convert_path(w) for w in words[2:]]
Greg Ward7b3d56c2000-07-30 00:21:36 +000094 elif action in ('graft', 'prune'):
Greg Ward071ed762000-09-26 02:12:31 +000095 if len(words) != 2:
Collin Winter5b7e9d72007-08-30 03:52:21 +000096 raise DistutilsTemplateError(
97 "'%s' expects a single <dir_pattern>" % action)
Greg Ward7b3d56c2000-07-30 00:21:36 +000098 dir_pattern = convert_path(words[1])
Greg Wardc98927a2000-07-30 00:08:13 +000099 else:
Collin Winter5b7e9d72007-08-30 03:52:21 +0000100 raise DistutilsTemplateError("unknown action '%s'" % action)
Greg Ward7b3d56c2000-07-30 00:21:36 +0000101
Greg Wardd5dcc172000-07-30 01:04:22 +0000102 return (action, patterns, dir, dir_pattern)
Greg Ward7b3d56c2000-07-30 00:21:36 +0000103
Collin Winter5b7e9d72007-08-30 03:52:21 +0000104 def process_template_line(self, line):
Greg Ward7b3d56c2000-07-30 00:21:36 +0000105 # Parse the line: split it up, make sure the right number of words
Greg Ward0f341852000-07-30 00:36:25 +0000106 # is there, and return the relevant words. 'action' is always
Greg Ward7b3d56c2000-07-30 00:21:36 +0000107 # defined: it's the first word of the line. Which of the other
108 # three are defined depends on the action; it'll be either
109 # patterns, (dir and patterns), or (dir_pattern).
Tarek Ziadé36797272010-07-22 12:50:05 +0000110 (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
Greg Wardadc11722000-07-30 00:04:17 +0000111
Greg Wardc98927a2000-07-30 00:08:13 +0000112 # OK, now we know that the action is valid and we have the
113 # right number of words on the line for that action -- so we
Greg Ward7b3d56c2000-07-30 00:21:36 +0000114 # can proceed with minimal error-checking.
Greg Wardc98927a2000-07-30 00:08:13 +0000115 if action == 'include':
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000116 self.debug_print("include " + ' '.join(patterns))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000117 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000118 if not self.include_pattern(pattern, anchor=1):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000119 log.warn("warning: no files found matching '%s'",
120 pattern)
Greg Wardadc11722000-07-30 00:04:17 +0000121
Greg Wardc98927a2000-07-30 00:08:13 +0000122 elif action == 'exclude':
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000123 self.debug_print("exclude " + ' '.join(patterns))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000124 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000125 if not self.exclude_pattern(pattern, anchor=1):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000126 log.warn(("warning: no previously-included files "
127 "found matching '%s'"), pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000128
129 elif action == 'global-include':
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000130 self.debug_print("global-include " + ' '.join(patterns))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000131 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000132 if not self.include_pattern(pattern, anchor=0):
Collin Winter5b7e9d72007-08-30 03:52:21 +0000133 log.warn(("warning: no files found matching '%s' "
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000134 "anywhere in distribution"), pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000135
136 elif action == 'global-exclude':
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000137 self.debug_print("global-exclude " + ' '.join(patterns))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000138 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000139 if not self.exclude_pattern(pattern, anchor=0):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000140 log.warn(("warning: no previously-included files matching "
141 "'%s' found anywhere in distribution"),
142 pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000143
144 elif action == 'recursive-include':
145 self.debug_print("recursive-include %s %s" %
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000146 (dir, ' '.join(patterns)))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000147 for pattern in patterns:
Greg Ward071ed762000-09-26 02:12:31 +0000148 if not self.include_pattern(pattern, prefix=dir):
Collin Winter5b7e9d72007-08-30 03:52:21 +0000149 log.warn(("warning: no files found matching '%s' "
Tim Peters182b5ac2004-07-18 06:16:08 +0000150 "under directory '%s'"),
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000151 pattern, dir)
Greg Wardc98927a2000-07-30 00:08:13 +0000152
153 elif action == 'recursive-exclude':
154 self.debug_print("recursive-exclude %s %s" %
Neal Norwitz9d72bb42007-04-17 08:48:32 +0000155 (dir, ' '.join(patterns)))
Greg Ward7b3d56c2000-07-30 00:21:36 +0000156 for pattern in patterns:
Greg Wardc98927a2000-07-30 00:08:13 +0000157 if not self.exclude_pattern(pattern, prefix=dir):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000158 log.warn(("warning: no previously-included files matching "
159 "'%s' found under directory '%s'"),
160 pattern, dir)
Fred Drakeb94b8492001-12-06 20:51:35 +0000161
Greg Wardc98927a2000-07-30 00:08:13 +0000162 elif action == 'graft':
163 self.debug_print("graft " + dir_pattern)
Greg Ward0f341852000-07-30 00:36:25 +0000164 if not self.include_pattern(None, prefix=dir_pattern):
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000165 log.warn("warning: no directories found matching '%s'",
166 dir_pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000167
168 elif action == 'prune':
169 self.debug_print("prune " + dir_pattern)
170 if not self.exclude_pattern(None, prefix=dir_pattern):
Collin Winter5b7e9d72007-08-30 03:52:21 +0000171 log.warn(("no previously-included directories found "
Jeremy Hyltoncd8a1142002-06-04 20:14:43 +0000172 "matching '%s'"), dir_pattern)
Greg Wardc98927a2000-07-30 00:08:13 +0000173 else:
Collin Winter5b7e9d72007-08-30 03:52:21 +0000174 raise DistutilsInternalError(
175 "this cannot happen: invalid action '%s'" % action)
Greg Wardadc11722000-07-30 00:04:17 +0000176
Tarek Ziadé36797272010-07-22 12:50:05 +0000177
Greg Ward979db972000-07-30 01:45:42 +0000178 # -- Filtering/selection methods -----------------------------------
179
Collin Winter5b7e9d72007-08-30 03:52:21 +0000180 def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0):
Greg Ward0f341852000-07-30 00:36:25 +0000181 """Select strings (presumably filenames) from 'self.files' that
Tarek Ziadé36797272010-07-22 12:50:05 +0000182 match 'pattern', a Unix-style wildcard (glob) pattern. Patterns
183 are not quite the same as implemented by the 'fnmatch' module: '*'
184 and '?' match non-special characters, where "special" is platform-
185 dependent: slash on Unix; colon, slash, and backslash on
Greg Ward0f341852000-07-30 00:36:25 +0000186 DOS/Windows; and colon on Mac OS.
Greg Wardadc11722000-07-30 00:04:17 +0000187
188 If 'anchor' is true (the default), then the pattern match is more
189 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
190 'anchor' is false, both of these will match.
191
192 If 'prefix' is supplied, then only filenames starting with 'prefix'
193 (itself a pattern) and ending with 'pattern', with anything in between
194 them, will match. 'anchor' is ignored in this case.
195
196 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
197 'pattern' is assumed to be either a string containing a regex or a
198 regex object -- no translation is done, the regex is just compiled
199 and used as-is.
200
201 Selected strings will be added to self.files.
202
Collin Winter5b7e9d72007-08-30 03:52:21 +0000203 Return True if files are found, False otherwise.
Greg Wardadc11722000-07-30 00:04:17 +0000204 """
Éric Araujo2e0a0e12012-02-25 16:28:05 +0100205 # XXX docstring lying about what the special chars are?
Collin Winter5b7e9d72007-08-30 03:52:21 +0000206 files_found = False
Greg Ward071ed762000-09-26 02:12:31 +0000207 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
Greg Ward0f341852000-07-30 00:36:25 +0000208 self.debug_print("include_pattern: applying regex r'%s'" %
Greg Wardadc11722000-07-30 00:04:17 +0000209 pattern_re.pattern)
210
211 # delayed loading of allfiles list
Greg Ward979db972000-07-30 01:45:42 +0000212 if self.allfiles is None:
213 self.findall()
Greg Wardadc11722000-07-30 00:04:17 +0000214
215 for name in self.allfiles:
Greg Ward071ed762000-09-26 02:12:31 +0000216 if pattern_re.search(name):
Greg Wardadc11722000-07-30 00:04:17 +0000217 self.debug_print(" adding " + name)
Greg Ward071ed762000-09-26 02:12:31 +0000218 self.files.append(name)
Collin Winter5b7e9d72007-08-30 03:52:21 +0000219 files_found = True
Greg Wardadc11722000-07-30 00:04:17 +0000220 return files_found
221
Greg Wardadc11722000-07-30 00:04:17 +0000222
Tarek Ziadé36797272010-07-22 12:50:05 +0000223 def exclude_pattern (self, pattern,
224 anchor=1, prefix=None, is_regex=0):
Greg Wardadc11722000-07-30 00:04:17 +0000225 """Remove strings (presumably filenames) from 'files' that match
Tarek Ziadé36797272010-07-22 12:50:05 +0000226 'pattern'. Other parameters are the same as for
227 'include_pattern()', above.
228 The list 'self.files' is modified in place.
229 Return True if files are found, False otherwise.
Greg Wardadc11722000-07-30 00:04:17 +0000230 """
Collin Winter5b7e9d72007-08-30 03:52:21 +0000231 files_found = False
Greg Ward071ed762000-09-26 02:12:31 +0000232 pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
Greg Wardadc11722000-07-30 00:04:17 +0000233 self.debug_print("exclude_pattern: applying regex r'%s'" %
234 pattern_re.pattern)
Greg Ward071ed762000-09-26 02:12:31 +0000235 for i in range(len(self.files)-1, -1, -1):
236 if pattern_re.search(self.files[i]):
Greg Wardadc11722000-07-30 00:04:17 +0000237 self.debug_print(" removing " + self.files[i])
238 del self.files[i]
Collin Winter5b7e9d72007-08-30 03:52:21 +0000239 files_found = True
Greg Wardadc11722000-07-30 00:04:17 +0000240 return files_found
241
Greg Wardadc11722000-07-30 00:04:17 +0000242
243# ----------------------------------------------------------------------
244# Utility functions
245
Jason R. Coombsedc4b2f2015-09-19 18:12:15 +0200246def _find_all_simple(path):
Greg Wardadc11722000-07-30 00:04:17 +0000247 """
Jason R. Coombsedc4b2f2015-09-19 18:12:15 +0200248 Find all files under 'path'
249 """
250 results = (
251 os.path.join(base, file)
252 for base, dirs, files in os.walk(path, followlinks=True)
253 for file in files
254 )
255 return filter(os.path.isfile, results)
Greg Wardadc11722000-07-30 00:04:17 +0000256
Greg Wardadc11722000-07-30 00:04:17 +0000257
Jason R. Coombsedc4b2f2015-09-19 18:12:15 +0200258def findall(dir=os.curdir):
259 """
260 Find all files under 'dir' and return the list of full filenames.
261 Unless dir is '.', return full filenames with dir prepended.
262 """
263 files = _find_all_simple(dir)
264 if dir == os.curdir:
265 make_rel = functools.partial(os.path.relpath, start=dir)
266 files = map(make_rel, files)
267 return list(files)
Greg Wardadc11722000-07-30 00:04:17 +0000268
269
Collin Winter5b7e9d72007-08-30 03:52:21 +0000270def glob_to_re(pattern):
Tarek Ziadé36797272010-07-22 12:50:05 +0000271 """Translate a shell-like glob pattern to a regular expression; return
272 a string containing the regex. Differs from 'fnmatch.translate()' in
273 that '*' does not match "special characters" (which are
274 platform-specific).
Greg Wardadc11722000-07-30 00:04:17 +0000275 """
Greg Ward071ed762000-09-26 02:12:31 +0000276 pattern_re = fnmatch.translate(pattern)
Greg Wardadc11722000-07-30 00:04:17 +0000277
278 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
279 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
280 # and by extension they shouldn't match such "special characters" under
281 # any OS. So change all non-escaped dots in the RE to match any
Éric Araujo2e0a0e12012-02-25 16:28:05 +0100282 # character except the special characters (currently: just os.sep).
283 sep = os.sep
284 if os.sep == '\\':
285 # we're using a regex to manipulate a regex, so we need
286 # to escape the backslash twice
287 sep = r'\\\\'
288 escaped = r'\1[^%s]' % sep
289 pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
Greg Wardadc11722000-07-30 00:04:17 +0000290 return pattern_re
291
Greg Wardadc11722000-07-30 00:04:17 +0000292
Collin Winter5b7e9d72007-08-30 03:52:21 +0000293def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0):
Greg Wardadc11722000-07-30 00:04:17 +0000294 """Translate a shell-like wildcard pattern to a compiled regular
Tarek Ziadé36797272010-07-22 12:50:05 +0000295 expression. Return the compiled regex. If 'is_regex' true,
Greg Wardadc11722000-07-30 00:04:17 +0000296 then 'pattern' is directly compiled to a regex (if it's a string)
297 or just returned as-is (assumes it's a regex object).
298 """
299 if is_regex:
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000300 if isinstance(pattern, str):
Greg Wardadc11722000-07-30 00:04:17 +0000301 return re.compile(pattern)
302 else:
303 return pattern
304
Serhiy Storchakabd48d272016-09-11 12:50:02 +0300305 # ditch start and end characters
306 start, _, end = glob_to_re('_').partition('_')
307
Greg Wardadc11722000-07-30 00:04:17 +0000308 if pattern:
Greg Ward071ed762000-09-26 02:12:31 +0000309 pattern_re = glob_to_re(pattern)
Serhiy Storchakabd48d272016-09-11 12:50:02 +0300310 assert pattern_re.startswith(start) and pattern_re.endswith(end)
Greg Wardadc11722000-07-30 00:04:17 +0000311 else:
312 pattern_re = ''
Fred Drakeb94b8492001-12-06 20:51:35 +0000313
Greg Wardadc11722000-07-30 00:04:17 +0000314 if prefix is not None:
Serhiy Storchakabd48d272016-09-11 12:50:02 +0300315 prefix_re = glob_to_re(prefix)
316 assert prefix_re.startswith(start) and prefix_re.endswith(end)
317 prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
Éric Araujo2e0a0e12012-02-25 16:28:05 +0100318 sep = os.sep
319 if os.sep == '\\':
320 sep = r'\\'
Serhiy Storchakabd48d272016-09-11 12:50:02 +0300321 pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
322 pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end)
Greg Wardadc11722000-07-30 00:04:17 +0000323 else: # no prefix -- respect anchor flag
324 if anchor:
Serhiy Storchakabd48d272016-09-11 12:50:02 +0300325 pattern_re = r'%s\A%s' % (start, pattern_re[len(start):])
Fred Drakeb94b8492001-12-06 20:51:35 +0000326
Greg Ward071ed762000-09-26 02:12:31 +0000327 return re.compile(pattern_re)