blob: 5eee174cc6eeecf8a20c94742269e50573c7dd00 [file] [log] [blame]
Tarek Ziade1231a4e2011-05-19 13:07:25 +02001"""Class representing the list of files in a distribution.
2
3The Manifest class can be used to:
4
5 - read or write a MANIFEST file
6 - read a template file and find out the file list
7"""
8# XXX todo: document + add tests
9import re
10import os
11import fnmatch
12
13from packaging import logger
14from packaging.util import write_file, convert_path
15from packaging.errors import (PackagingTemplateError,
16 PackagingInternalError)
17
18__all__ = ['Manifest']
19
20# a \ followed by some spaces + EOL
21_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M)
22_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
23
24
25class Manifest(object):
26 """A list of files built by on exploring the filesystem and filtered by
27 applying various patterns to what we find there.
28 """
29
30 def __init__(self):
31 self.allfiles = None
32 self.files = []
33
34 #
35 # Public API
36 #
37
38 def findall(self, dir=os.curdir):
39 self.allfiles = _findall(dir)
40
41 def append(self, item):
42 self.files.append(item)
43
44 def extend(self, items):
45 self.files.extend(items)
46
47 def sort(self):
48 # Not a strict lexical sort!
49 self.files = [os.path.join(*path_tuple) for path_tuple in
50 sorted(os.path.split(path) for path in self.files)]
51
52 def clear(self):
53 """Clear all collected files."""
54 self.files = []
55 if self.allfiles is not None:
56 self.allfiles = []
57
58 def remove_duplicates(self):
59 # Assumes list has been sorted!
60 for i in range(len(self.files) - 1, 0, -1):
61 if self.files[i] == self.files[i - 1]:
62 del self.files[i]
63
64 def read_template(self, path_or_file):
65 """Read and parse a manifest template file.
66 'path' can be a path or a file-like object.
67
68 Updates the list accordingly.
69 """
70 if isinstance(path_or_file, str):
71 f = open(path_or_file)
72 else:
73 f = path_or_file
74
75 try:
76 content = f.read()
77 # first, let's unwrap collapsed lines
78 content = _COLLAPSE_PATTERN.sub('', content)
79 # next, let's remove commented lines and empty lines
80 content = _COMMENTED_LINE.sub('', content)
81
82 # now we have our cleaned up lines
83 lines = [line.strip() for line in content.split('\n')]
84 finally:
85 f.close()
86
87 for line in lines:
88 if line == '':
89 continue
90 try:
91 self._process_template_line(line)
92 except PackagingTemplateError as msg:
93 logger.warning("%s, %s", path_or_file, msg)
94
95 def write(self, path):
96 """Write the file list in 'self.filelist' (presumably as filled in
97 by 'add_defaults()' and 'read_template()') to the manifest file
98 named by 'self.manifest'.
99 """
100 if os.path.isfile(path):
101 with open(path) as fp:
102 first_line = fp.readline()
103
104 if first_line != '# file GENERATED by packaging, do NOT edit\n':
105 logger.info("not writing to manually maintained "
106 "manifest file %r", path)
107 return
108
109 self.sort()
110 self.remove_duplicates()
111 content = self.files[:]
112 content.insert(0, '# file GENERATED by packaging, do NOT edit')
113 logger.info("writing manifest file %r", path)
114 write_file(path, content)
115
116 def read(self, path):
117 """Read the manifest file (named by 'self.manifest') and use it to
118 fill in 'self.filelist', the list of files to include in the source
119 distribution.
120 """
121 logger.info("reading manifest file %r", path)
122 with open(path) as manifest:
123 for line in manifest.readlines():
124 self.append(line)
125
126 def exclude_pattern(self, pattern, anchor=True, prefix=None,
127 is_regex=False):
128 """Remove strings (presumably filenames) from 'files' that match
129 'pattern'.
130
131 Other parameters are the same as for 'include_pattern()', above.
132 The list 'self.files' is modified in place. Return True if files are
133 found.
134 """
135 files_found = False
136 pattern_re = _translate_pattern(pattern, anchor, prefix, is_regex)
137 for i in range(len(self.files) - 1, -1, -1):
138 if pattern_re.search(self.files[i]):
139 del self.files[i]
140 files_found = True
141
142 return files_found
143
144 #
145 # Private API
146 #
147
148 def _parse_template_line(self, line):
149 words = line.split()
Éric Araujoaa2cb3a2011-10-11 03:06:16 +0200150 if len(words) == 1 and words[0] not in (
151 'include', 'exclude', 'global-include', 'global-exclude',
152 'recursive-include', 'recursive-exclude', 'graft', 'prune'):
Tarek Ziade1231a4e2011-05-19 13:07:25 +0200153 # no action given, let's use the default 'include'
154 words.insert(0, 'include')
155
156 action = words[0]
157 patterns = dir = dir_pattern = None
158
159 if action in ('include', 'exclude',
160 'global-include', 'global-exclude'):
161 if len(words) < 2:
162 raise PackagingTemplateError(
163 "%r expects <pattern1> <pattern2> ..." % action)
164
165 patterns = [convert_path(word) for word in words[1:]]
166
167 elif action in ('recursive-include', 'recursive-exclude'):
168 if len(words) < 3:
169 raise PackagingTemplateError(
170 "%r expects <dir> <pattern1> <pattern2> ..." % action)
171
172 dir = convert_path(words[1])
173 patterns = [convert_path(word) for word in words[2:]]
174
175 elif action in ('graft', 'prune'):
176 if len(words) != 2:
177 raise PackagingTemplateError(
178 "%r expects a single <dir_pattern>" % action)
179
180 dir_pattern = convert_path(words[1])
181
182 else:
183 raise PackagingTemplateError("unknown action %r" % action)
184
185 return action, patterns, dir, dir_pattern
186
187 def _process_template_line(self, line):
188 # Parse the line: split it up, make sure the right number of words
189 # is there, and return the relevant words. 'action' is always
190 # defined: it's the first word of the line. Which of the other
191 # three are defined depends on the action; it'll be either
192 # patterns, (dir and patterns), or (dir_pattern).
193 action, patterns, dir, dir_pattern = self._parse_template_line(line)
194
195 # OK, now we know that the action is valid and we have the
196 # right number of words on the line for that action -- so we
197 # can proceed with minimal error-checking.
198 if action == 'include':
199 for pattern in patterns:
200 if not self._include_pattern(pattern, anchor=True):
201 logger.warning("no files found matching %r", pattern)
202
203 elif action == 'exclude':
204 for pattern in patterns:
205 if not self.exclude_pattern(pattern, anchor=True):
206 logger.warning("no previously-included files "
207 "found matching %r", pattern)
208
209 elif action == 'global-include':
210 for pattern in patterns:
211 if not self._include_pattern(pattern, anchor=False):
212 logger.warning("no files found matching %r "
213 "anywhere in distribution", pattern)
214
215 elif action == 'global-exclude':
216 for pattern in patterns:
217 if not self.exclude_pattern(pattern, anchor=False):
218 logger.warning("no previously-included files "
219 "matching %r found anywhere in "
220 "distribution", pattern)
221
222 elif action == 'recursive-include':
223 for pattern in patterns:
224 if not self._include_pattern(pattern, prefix=dir):
225 logger.warning("no files found matching %r "
226 "under directory %r", pattern, dir)
227
228 elif action == 'recursive-exclude':
229 for pattern in patterns:
230 if not self.exclude_pattern(pattern, prefix=dir):
231 logger.warning("no previously-included files "
232 "matching %r found under directory %r",
233 pattern, dir)
234
235 elif action == 'graft':
236 if not self._include_pattern(None, prefix=dir_pattern):
237 logger.warning("no directories found matching %r",
238 dir_pattern)
239
240 elif action == 'prune':
241 if not self.exclude_pattern(None, prefix=dir_pattern):
242 logger.warning("no previously-included directories found "
243 "matching %r", dir_pattern)
244 else:
245 raise PackagingInternalError(
246 "this cannot happen: invalid action %r" % action)
247
248 def _include_pattern(self, pattern, anchor=True, prefix=None,
249 is_regex=False):
250 """Select strings (presumably filenames) from 'self.files' that
251 match 'pattern', a Unix-style wildcard (glob) pattern.
252
253 Patterns are not quite the same as implemented by the 'fnmatch'
254 module: '*' and '?' match non-special characters, where "special"
255 is platform-dependent: slash on Unix; colon, slash, and backslash on
256 DOS/Windows; and colon on Mac OS.
257
258 If 'anchor' is true (the default), then the pattern match is more
259 stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
260 'anchor' is false, both of these will match.
261
262 If 'prefix' is supplied, then only filenames starting with 'prefix'
263 (itself a pattern) and ending with 'pattern', with anything in between
264 them, will match. 'anchor' is ignored in this case.
265
266 If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
267 'pattern' is assumed to be either a string containing a regex or a
268 regex object -- no translation is done, the regex is just compiled
269 and used as-is.
270
271 Selected strings will be added to self.files.
272
273 Return True if files are found.
274 """
275 files_found = False
276 pattern_re = _translate_pattern(pattern, anchor, prefix, is_regex)
277
278 # delayed loading of allfiles list
279 if self.allfiles is None:
280 self.findall()
281
282 for name in self.allfiles:
283 if pattern_re.search(name):
284 self.files.append(name)
285 files_found = True
286
287 return files_found
288
289
290#
291# Utility functions
292#
293def _findall(dir=os.curdir):
294 """Find all files under 'dir' and return the list of full filenames
295 (relative to 'dir').
296 """
297 from stat import S_ISREG, S_ISDIR, S_ISLNK
298
299 list = []
300 stack = [dir]
301 pop = stack.pop
302 push = stack.append
303
304 while stack:
305 dir = pop()
306 names = os.listdir(dir)
307
308 for name in names:
309 if dir != os.curdir: # avoid the dreaded "./" syndrome
310 fullname = os.path.join(dir, name)
311 else:
312 fullname = name
313
314 # Avoid excess stat calls -- just one will do, thank you!
315 stat = os.stat(fullname)
316 mode = stat.st_mode
317 if S_ISREG(mode):
318 list.append(fullname)
319 elif S_ISDIR(mode) and not S_ISLNK(mode):
320 push(fullname)
321
322 return list
323
324
325def _glob_to_re(pattern):
326 """Translate a shell-like glob pattern to a regular expression.
327
328 Return a string containing the regex. Differs from
329 'fnmatch.translate()' in that '*' does not match "special characters"
330 (which are platform-specific).
331 """
332 pattern_re = fnmatch.translate(pattern)
333
334 # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
335 # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
336 # and by extension they shouldn't match such "special characters" under
337 # any OS. So change all non-escaped dots in the RE to match any
338 # character except the special characters.
339 # XXX currently the "special characters" are just slash -- i.e. this is
340 # Unix-only.
341 pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
342
343 return pattern_re
344
345
346def _translate_pattern(pattern, anchor=True, prefix=None, is_regex=False):
347 """Translate a shell-like wildcard pattern to a compiled regular
348 expression.
349
350 Return the compiled regex. If 'is_regex' true,
351 then 'pattern' is directly compiled to a regex (if it's a string)
352 or just returned as-is (assumes it's a regex object).
353 """
354 if is_regex:
355 if isinstance(pattern, str):
356 return re.compile(pattern)
357 else:
358 return pattern
359
360 if pattern:
361 pattern_re = _glob_to_re(pattern)
362 else:
363 pattern_re = ''
364
365 if prefix is not None:
366 # ditch end of pattern character
367 empty_pattern = _glob_to_re('')
368 prefix_re = _glob_to_re(prefix)[:-len(empty_pattern)]
369 pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
370 else: # no prefix -- respect anchor flag
371 if anchor:
372 pattern_re = "^" + pattern_re
373
374 return re.compile(pattern_re)