blob: a3798530a5825ba5dc76091e0f61517068bb7936 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""Class representing the list of files in a distribution.
2
3The Manifest class can be used to:
4
5 - read or write a MANIFEST file
6 - read a template file and find out the file list
7"""
8# XXX todo: document + add tests
9import re
10import os
11import fnmatch
12
13from packaging import logger
14from packaging.util import write_file, convert_path
15from packaging.errors import (PackagingTemplateError,
16 PackagingInternalError)
17
18__all__ = ['Manifest']
19
20# a \ followed by some spaces + EOL
21_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M)
22_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
23
24
25class Manifest(object):
26 """A list of files built by on exploring the filesystem and filtered by
27 applying various patterns to what we find there.
28 """
29
30 def __init__(self):
31 self.allfiles = None
32 self.files = []
33
34 #
35 # Public API
36 #
37
38 def findall(self, dir=os.curdir):
39 self.allfiles = _findall(dir)
40
41 def append(self, item):
42 self.files.append(item)
43
44 def extend(self, items):
45 self.files.extend(items)
46
47 def sort(self):
48 # Not a strict lexical sort!
49 self.files = [os.path.join(*path_tuple) for path_tuple in
50 sorted(os.path.split(path) for path in self.files)]
51
52 def clear(self):
53 """Clear all collected files."""
54 self.files = []
55 if self.allfiles is not None:
56 self.allfiles = []
57
58 def remove_duplicates(self):
59 # Assumes list has been sorted!
60 for i in range(len(self.files) - 1, 0, -1):
61 if self.files[i] == self.files[i - 1]:
62 del self.files[i]
63
64 def read_template(self, path_or_file):
65 """Read and parse a manifest template file.
66 'path' can be a path or a file-like object.
67
68 Updates the list accordingly.
69 """
70 if isinstance(path_or_file, str):
71 f = open(path_or_file)
72 else:
73 f = path_or_file
74
75 try:
76 content = f.read()
77 # first, let's unwrap collapsed lines
78 content = _COLLAPSE_PATTERN.sub('', content)
79 # next, let's remove commented lines and empty lines
80 content = _COMMENTED_LINE.sub('', content)
81
82 # now we have our cleaned up lines
83 lines = [line.strip() for line in content.split('\n')]
84 finally:
85 f.close()
86
87 for line in lines:
88 if line == '':
89 continue
90 try:
91 self._process_template_line(line)
92 except PackagingTemplateError as msg:
93 logger.warning("%s, %s", path_or_file, msg)
94
95 def write(self, path):
96 """Write the file list in 'self.filelist' (presumably as filled in
97 by 'add_defaults()' and 'read_template()') to the manifest file
98 named by 'self.manifest'.
99 """
100 if os.path.isfile(path):
101 with open(path) as fp:
102 first_line = fp.readline()
103
104 if first_line != '# file GENERATED by packaging, do NOT edit\n':
105 logger.info("not writing to manually maintained "
106 "manifest file %r", path)
107 return
108
109 self.sort()
110 self.remove_duplicates()
111 content = self.files[:]
112 content.insert(0, '# file GENERATED by packaging, do NOT edit')
113 logger.info("writing manifest file %r", path)
114 write_file(path, content)
115
116 def read(self, path):
117 """Read the manifest file (named by 'self.manifest') and use it to
118 fill in 'self.filelist', the list of files to include in the source
119 distribution.
120 """
121 logger.info("reading manifest file %r", path)
122 with open(path) as manifest:
123 for line in manifest.readlines():
124 self.append(line)
125
126 def exclude_pattern(self, pattern, anchor=True, prefix=None,
127 is_regex=False):
128 """Remove strings (presumably filenames) from 'files' that match
129 'pattern'.
130
131 Other parameters are the same as for 'include_pattern()', above.
132 The list 'self.files' is modified in place. Return True if files are
133 found.
134 """
135 files_found = False
136 pattern_re = _translate_pattern(pattern, anchor, prefix, is_regex)
137 for i in range(len(self.files) - 1, -1, -1):
138 if pattern_re.search(self.files[i]):
139 del self.files[i]
140 files_found = True
141
142 return files_found
143
144 #
145 # Private API
146 #
147
148 def _parse_template_line(self, line):
149 words = line.split()
150 if len(words) == 1:
151 # no action given, let's use the default 'include'
152 words.insert(0, 'include')
153
154 action = words[0]
155 patterns = dir = dir_pattern = None
156
157 if action in ('include', 'exclude',
158 'global-include', 'global-exclude'):
159 if len(words) < 2:
160 raise PackagingTemplateError(
161 "%r expects <pattern1> <pattern2> ..." % action)
162
163 patterns = [convert_path(word) for word in words[1:]]
164
165 elif action in ('recursive-include', 'recursive-exclude'):
166 if len(words) < 3:
167 raise PackagingTemplateError(
168 "%r expects <dir> <pattern1> <pattern2> ..." % action)
169
170 dir = convert_path(words[1])
171 patterns = [convert_path(word) for word in words[2:]]
172
173 elif action in ('graft', 'prune'):
174 if len(words) != 2:
175 raise PackagingTemplateError(
176 "%r expects a single <dir_pattern>" % action)
177
178 dir_pattern = convert_path(words[1])
179
180 else:
181 raise PackagingTemplateError("unknown action %r" % action)
182
183 return action, patterns, dir, dir_pattern
184
185 def _process_template_line(self, line):
186 # Parse the line: split it up, make sure the right number of words
187 # is there, and return the relevant words. 'action' is always
188 # defined: it's the first word of the line. Which of the other
189 # three are defined depends on the action; it'll be either
190 # patterns, (dir and patterns), or (dir_pattern).
191 action, patterns, dir, dir_pattern = self._parse_template_line(line)
192
193 # OK, now we know that the action is valid and we have the
194 # right number of words on the line for that action -- so we
195 # can proceed with minimal error-checking.
196 if action == 'include':
197 for pattern in patterns:
198 if not self._include_pattern(pattern, anchor=True):
199 logger.warning("no files found matching %r", pattern)
200
201 elif action == 'exclude':
202 for pattern in patterns:
203 if not self.exclude_pattern(pattern, anchor=True):
204 logger.warning("no previously-included files "
205 "found matching %r", pattern)
206
207 elif action == 'global-include':
208 for pattern in patterns:
209 if not self._include_pattern(pattern, anchor=False):
210 logger.warning("no files found matching %r "
211 "anywhere in distribution", pattern)
212
213 elif action == 'global-exclude':
214 for pattern in patterns:
215 if not self.exclude_pattern(pattern, anchor=False):
216 logger.warning("no previously-included files "
217 "matching %r found anywhere in "
218 "distribution", pattern)
219
220 elif action == 'recursive-include':
221 for pattern in patterns:
222 if not self._include_pattern(pattern, prefix=dir):
223 logger.warning("no files found matching %r "
224 "under directory %r", pattern, dir)
225
226 elif action == 'recursive-exclude':
227 for pattern in patterns:
228 if not self.exclude_pattern(pattern, prefix=dir):
229 logger.warning("no previously-included files "
230 "matching %r found under directory %r",
231 pattern, dir)
232
233 elif action == 'graft':
234 if not self._include_pattern(None, prefix=dir_pattern):
235 logger.warning("no directories found matching %r",
236 dir_pattern)
237
238 elif action == 'prune':
239 if not self.exclude_pattern(None, prefix=dir_pattern):
240 logger.warning("no previously-included directories found "
241 "matching %r", dir_pattern)
242 else:
243 raise PackagingInternalError(
244 "this cannot happen: invalid action %r" % action)
245
246 def _include_pattern(self, pattern, anchor=True, prefix=None,
247 is_regex=False):
248 """Select strings (presumably filenames) from 'self.files' that
249 match 'pattern', a Unix-style wildcard (glob) pattern.
250
251 Patterns are not quite the same as implemented by the 'fnmatch'
252 module: '*' and '?' match non-special characters, where "special"
253 is platform-dependent: slash on Unix; colon, slash, and backslash on
254 DOS/Windows; and colon on Mac OS.
255
256 If 'anchor' is true (the default), then the pattern match is more
257 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
258 'anchor' is false, both of these will match.
259
260 If 'prefix' is supplied, then only filenames starting with 'prefix'
261 (itself a pattern) and ending with 'pattern', with anything in between
262 them, will match. 'anchor' is ignored in this case.
263
264 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
265 'pattern' is assumed to be either a string containing a regex or a
266 regex object -- no translation is done, the regex is just compiled
267 and used as-is.
268
269 Selected strings will be added to self.files.
270
271 Return True if files are found.
272 """
273 files_found = False
274 pattern_re = _translate_pattern(pattern, anchor, prefix, is_regex)
275
276 # delayed loading of allfiles list
277 if self.allfiles is None:
278 self.findall()
279
280 for name in self.allfiles:
281 if pattern_re.search(name):
282 self.files.append(name)
283 files_found = True
284
285 return files_found
286
287
288#
289# Utility functions
290#
291def _findall(dir=os.curdir):
292 """Find all files under 'dir' and return the list of full filenames
293 (relative to 'dir').
294 """
295 from stat import S_ISREG, S_ISDIR, S_ISLNK
296
297 list = []
298 stack = [dir]
299 pop = stack.pop
300 push = stack.append
301
302 while stack:
303 dir = pop()
304 names = os.listdir(dir)
305
306 for name in names:
307 if dir != os.curdir: # avoid the dreaded "./" syndrome
308 fullname = os.path.join(dir, name)
309 else:
310 fullname = name
311
312 # Avoid excess stat calls -- just one will do, thank you!
313 stat = os.stat(fullname)
314 mode = stat.st_mode
315 if S_ISREG(mode):
316 list.append(fullname)
317 elif S_ISDIR(mode) and not S_ISLNK(mode):
318 push(fullname)
319
320 return list
321
322
323def _glob_to_re(pattern):
324 """Translate a shell-like glob pattern to a regular expression.
325
326 Return a string containing the regex. Differs from
327 'fnmatch.translate()' in that '*' does not match "special characters"
328 (which are platform-specific).
329 """
330 pattern_re = fnmatch.translate(pattern)
331
332 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
333 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
334 # and by extension they shouldn't match such "special characters" under
335 # any OS. So change all non-escaped dots in the RE to match any
336 # character except the special characters.
337 # XXX currently the "special characters" are just slash -- i.e. this is
338 # Unix-only.
339 pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
340
341 return pattern_re
342
343
344def _translate_pattern(pattern, anchor=True, prefix=None, is_regex=False):
345 """Translate a shell-like wildcard pattern to a compiled regular
346 expression.
347
348 Return the compiled regex. If 'is_regex' true,
349 then 'pattern' is directly compiled to a regex (if it's a string)
350 or just returned as-is (assumes it's a regex object).
351 """
352 if is_regex:
353 if isinstance(pattern, str):
354 return re.compile(pattern)
355 else:
356 return pattern
357
358 if pattern:
359 pattern_re = _glob_to_re(pattern)
360 else:
361 pattern_re = ''
362
363 if prefix is not None:
364 # ditch end of pattern character
365 empty_pattern = _glob_to_re('')
366 prefix_re = _glob_to_re(prefix)[:-len(empty_pattern)]
367 pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
368 else: # no prefix -- respect anchor flag
369 if anchor:
370 pattern_re = "^" + pattern_re
371
372 return re.compile(pattern_re)