Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 1 | """distutils.filelist |
| 2 | |
| 3 | Provides the FileList class, used for poking about the filesystem |
| 4 | and building lists of files. |
| 5 | """ |
| 6 | |
Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 7 | import os, re |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 8 | import fnmatch |
Jason R. Coombs | edc4b2f | 2015-09-19 18:12:15 +0200 | [diff] [blame] | 9 | import functools |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 10 | from distutils.util import convert_path |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 11 | from distutils.errors import DistutilsTemplateError, DistutilsInternalError |
Jeremy Hylton | 4f2f133 | 2002-06-04 21:04:03 +0000 | [diff] [blame] | 12 | from distutils import log |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 13 | |
| 14 | class FileList: |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 15 | """A list of files built by on exploring the filesystem and filtered by |
| 16 | applying various patterns to what we find there. |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 17 | |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 18 | Instance attributes: |
| 19 | dir |
| 20 | directory from which files will be taken -- only used if |
| 21 | 'allfiles' not supplied to constructor |
| 22 | files |
| 23 | list of filenames currently being built/filtered/manipulated |
| 24 | allfiles |
| 25 | complete list of files under consideration (ie. without any |
| 26 | filtering applied) |
| 27 | """ |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 28 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 29 | def __init__(self, warn=None, debug_print=None): |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 30 | # ignore argument to FileList, but keep them for backwards |
| 31 | # compatibility |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 32 | self.allfiles = None |
| 33 | self.files = [] |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 34 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 35 | def set_allfiles(self, allfiles): |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 36 | self.allfiles = allfiles |
| 37 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 38 | def findall(self, dir=os.curdir): |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 39 | self.allfiles = findall(dir) |
| 40 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 41 | def debug_print(self, msg): |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 42 | """Print 'msg' to stdout if the global DEBUG (taken from the |
| 43 | DISTUTILS_DEBUG environment variable) flag is true. |
| 44 | """ |
Jeremy Hylton | fcd7353 | 2002-09-11 16:31:53 +0000 | [diff] [blame] | 45 | from distutils.debug import DEBUG |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 46 | if DEBUG: |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 47 | print(msg) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 48 | |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 49 | # -- List-like methods --------------------------------------------- |
| 50 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 51 | def append(self, item): |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 52 | self.files.append(item) |
| 53 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 54 | def extend(self, items): |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 55 | self.files.extend(items) |
| 56 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 57 | def sort(self): |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 58 | # Not a strict lexical sort! |
Collin Winter | dc40ae6 | 2007-07-17 00:39:32 +0000 | [diff] [blame] | 59 | sortable_files = sorted(map(os.path.split, self.files)) |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 60 | self.files = [] |
| 61 | for sort_tuple in sortable_files: |
Neal Norwitz | d910855 | 2006-03-17 08:00:19 +0000 | [diff] [blame] | 62 | self.files.append(os.path.join(*sort_tuple)) |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 63 | |
| 64 | |
| 65 | # -- Other miscellaneous utility methods --------------------------- |
| 66 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 67 | def remove_duplicates(self): |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 68 | # Assumes list has been sorted! |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 69 | for i in range(len(self.files) - 1, 0, -1): |
| 70 | if self.files[i] == self.files[i - 1]: |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 71 | del self.files[i] |
| 72 | |
| 73 | |
| 74 | # -- "File template" methods --------------------------------------- |
Fred Drake | b94b849 | 2001-12-06 20:51:35 +0000 | [diff] [blame] | 75 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 76 | def _parse_template_line(self, line): |
Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 77 | words = line.split() |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 78 | action = words[0] |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 79 | |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 80 | patterns = dir = dir_pattern = None |
| 81 | |
| 82 | if action in ('include', 'exclude', |
| 83 | 'global-include', 'global-exclude'): |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 84 | if len(words) < 2: |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 85 | raise DistutilsTemplateError( |
| 86 | "'%s' expects <pattern1> <pattern2> ..." % action) |
Amaury Forgeot d'Arc | 61cb087 | 2008-07-26 20:09:45 +0000 | [diff] [blame] | 87 | patterns = [convert_path(w) for w in words[1:]] |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 88 | elif action in ('recursive-include', 'recursive-exclude'): |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 89 | if len(words) < 3: |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 90 | raise DistutilsTemplateError( |
| 91 | "'%s' expects <dir> <pattern1> <pattern2> ..." % action) |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 92 | dir = convert_path(words[1]) |
Amaury Forgeot d'Arc | 61cb087 | 2008-07-26 20:09:45 +0000 | [diff] [blame] | 93 | patterns = [convert_path(w) for w in words[2:]] |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 94 | elif action in ('graft', 'prune'): |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 95 | if len(words) != 2: |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 96 | raise DistutilsTemplateError( |
| 97 | "'%s' expects a single <dir_pattern>" % action) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 98 | dir_pattern = convert_path(words[1]) |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 99 | else: |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 100 | raise DistutilsTemplateError("unknown action '%s'" % action) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 101 | |
Greg Ward | d5dcc17 | 2000-07-30 01:04:22 +0000 | [diff] [blame] | 102 | return (action, patterns, dir, dir_pattern) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 103 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 104 | def process_template_line(self, line): |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 105 | # Parse the line: split it up, make sure the right number of words |
Greg Ward | 0f34185 | 2000-07-30 00:36:25 +0000 | [diff] [blame] | 106 | # is there, and return the relevant words. 'action' is always |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 107 | # defined: it's the first word of the line. Which of the other |
| 108 | # three are defined depends on the action; it'll be either |
| 109 | # patterns, (dir and patterns), or (dir_pattern). |
Tarek Ziadé | 3679727 | 2010-07-22 12:50:05 +0000 | [diff] [blame] | 110 | (action, patterns, dir, dir_pattern) = self._parse_template_line(line) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 111 | |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 112 | # OK, now we know that the action is valid and we have the |
| 113 | # right number of words on the line for that action -- so we |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 114 | # can proceed with minimal error-checking. |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 115 | if action == 'include': |
Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 116 | self.debug_print("include " + ' '.join(patterns)) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 117 | for pattern in patterns: |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 118 | if not self.include_pattern(pattern, anchor=1): |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 119 | log.warn("warning: no files found matching '%s'", |
| 120 | pattern) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 121 | |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 122 | elif action == 'exclude': |
Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 123 | self.debug_print("exclude " + ' '.join(patterns)) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 124 | for pattern in patterns: |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 125 | if not self.exclude_pattern(pattern, anchor=1): |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 126 | log.warn(("warning: no previously-included files " |
| 127 | "found matching '%s'"), pattern) |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 128 | |
| 129 | elif action == 'global-include': |
Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 130 | self.debug_print("global-include " + ' '.join(patterns)) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 131 | for pattern in patterns: |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 132 | if not self.include_pattern(pattern, anchor=0): |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 133 | log.warn(("warning: no files found matching '%s' " |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 134 | "anywhere in distribution"), pattern) |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 135 | |
| 136 | elif action == 'global-exclude': |
Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 137 | self.debug_print("global-exclude " + ' '.join(patterns)) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 138 | for pattern in patterns: |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 139 | if not self.exclude_pattern(pattern, anchor=0): |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 140 | log.warn(("warning: no previously-included files matching " |
| 141 | "'%s' found anywhere in distribution"), |
| 142 | pattern) |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 143 | |
| 144 | elif action == 'recursive-include': |
| 145 | self.debug_print("recursive-include %s %s" % |
Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 146 | (dir, ' '.join(patterns))) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 147 | for pattern in patterns: |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 148 | if not self.include_pattern(pattern, prefix=dir): |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 149 | log.warn(("warning: no files found matching '%s' " |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 150 | "under directory '%s'"), |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 151 | pattern, dir) |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 152 | |
| 153 | elif action == 'recursive-exclude': |
| 154 | self.debug_print("recursive-exclude %s %s" % |
Neal Norwitz | 9d72bb4 | 2007-04-17 08:48:32 +0000 | [diff] [blame] | 155 | (dir, ' '.join(patterns))) |
Greg Ward | 7b3d56c | 2000-07-30 00:21:36 +0000 | [diff] [blame] | 156 | for pattern in patterns: |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 157 | if not self.exclude_pattern(pattern, prefix=dir): |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 158 | log.warn(("warning: no previously-included files matching " |
| 159 | "'%s' found under directory '%s'"), |
| 160 | pattern, dir) |
Fred Drake | b94b849 | 2001-12-06 20:51:35 +0000 | [diff] [blame] | 161 | |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 162 | elif action == 'graft': |
| 163 | self.debug_print("graft " + dir_pattern) |
Greg Ward | 0f34185 | 2000-07-30 00:36:25 +0000 | [diff] [blame] | 164 | if not self.include_pattern(None, prefix=dir_pattern): |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 165 | log.warn("warning: no directories found matching '%s'", |
| 166 | dir_pattern) |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 167 | |
| 168 | elif action == 'prune': |
| 169 | self.debug_print("prune " + dir_pattern) |
| 170 | if not self.exclude_pattern(None, prefix=dir_pattern): |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 171 | log.warn(("no previously-included directories found " |
Jeremy Hylton | cd8a114 | 2002-06-04 20:14:43 +0000 | [diff] [blame] | 172 | "matching '%s'"), dir_pattern) |
Greg Ward | c98927a | 2000-07-30 00:08:13 +0000 | [diff] [blame] | 173 | else: |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 174 | raise DistutilsInternalError( |
| 175 | "this cannot happen: invalid action '%s'" % action) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 176 | |
Tarek Ziadé | 3679727 | 2010-07-22 12:50:05 +0000 | [diff] [blame] | 177 | |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 178 | # -- Filtering/selection methods ----------------------------------- |
| 179 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 180 | def include_pattern(self, pattern, anchor=1, prefix=None, is_regex=0): |
Greg Ward | 0f34185 | 2000-07-30 00:36:25 +0000 | [diff] [blame] | 181 | """Select strings (presumably filenames) from 'self.files' that |
Tarek Ziadé | 3679727 | 2010-07-22 12:50:05 +0000 | [diff] [blame] | 182 | match 'pattern', a Unix-style wildcard (glob) pattern. Patterns |
| 183 | are not quite the same as implemented by the 'fnmatch' module: '*' |
| 184 | and '?' match non-special characters, where "special" is platform- |
| 185 | dependent: slash on Unix; colon, slash, and backslash on |
Greg Ward | 0f34185 | 2000-07-30 00:36:25 +0000 | [diff] [blame] | 186 | DOS/Windows; and colon on Mac OS. |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 187 | |
| 188 | If 'anchor' is true (the default), then the pattern match is more |
| 189 | stringent: "*.py" will match "foo.py" but not "foo/bar.py". If |
| 190 | 'anchor' is false, both of these will match. |
| 191 | |
| 192 | If 'prefix' is supplied, then only filenames starting with 'prefix' |
| 193 | (itself a pattern) and ending with 'pattern', with anything in between |
| 194 | them, will match. 'anchor' is ignored in this case. |
| 195 | |
| 196 | If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and |
| 197 | 'pattern' is assumed to be either a string containing a regex or a |
| 198 | regex object -- no translation is done, the regex is just compiled |
| 199 | and used as-is. |
| 200 | |
| 201 | Selected strings will be added to self.files. |
| 202 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 203 | Return True if files are found, False otherwise. |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 204 | """ |
Éric Araujo | 2e0a0e1 | 2012-02-25 16:28:05 +0100 | [diff] [blame] | 205 | # XXX docstring lying about what the special chars are? |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 206 | files_found = False |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 207 | pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) |
Greg Ward | 0f34185 | 2000-07-30 00:36:25 +0000 | [diff] [blame] | 208 | self.debug_print("include_pattern: applying regex r'%s'" % |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 209 | pattern_re.pattern) |
| 210 | |
| 211 | # delayed loading of allfiles list |
Greg Ward | 979db97 | 2000-07-30 01:45:42 +0000 | [diff] [blame] | 212 | if self.allfiles is None: |
| 213 | self.findall() |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 214 | |
| 215 | for name in self.allfiles: |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 216 | if pattern_re.search(name): |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 217 | self.debug_print(" adding " + name) |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 218 | self.files.append(name) |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 219 | files_found = True |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 220 | return files_found |
| 221 | |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 222 | |
Tarek Ziadé | 3679727 | 2010-07-22 12:50:05 +0000 | [diff] [blame] | 223 | def exclude_pattern (self, pattern, |
| 224 | anchor=1, prefix=None, is_regex=0): |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 225 | """Remove strings (presumably filenames) from 'files' that match |
Tarek Ziadé | 3679727 | 2010-07-22 12:50:05 +0000 | [diff] [blame] | 226 | 'pattern'. Other parameters are the same as for |
| 227 | 'include_pattern()', above. |
| 228 | The list 'self.files' is modified in place. |
| 229 | Return True if files are found, False otherwise. |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 230 | """ |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 231 | files_found = False |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 232 | pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 233 | self.debug_print("exclude_pattern: applying regex r'%s'" % |
| 234 | pattern_re.pattern) |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 235 | for i in range(len(self.files)-1, -1, -1): |
| 236 | if pattern_re.search(self.files[i]): |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 237 | self.debug_print(" removing " + self.files[i]) |
| 238 | del self.files[i] |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 239 | files_found = True |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 240 | return files_found |
| 241 | |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 242 | |
| 243 | # ---------------------------------------------------------------------- |
| 244 | # Utility functions |
| 245 | |
Jason R. Coombs | edc4b2f | 2015-09-19 18:12:15 +0200 | [diff] [blame] | 246 | def _find_all_simple(path): |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 247 | """ |
Jason R. Coombs | edc4b2f | 2015-09-19 18:12:15 +0200 | [diff] [blame] | 248 | Find all files under 'path' |
| 249 | """ |
| 250 | results = ( |
| 251 | os.path.join(base, file) |
| 252 | for base, dirs, files in os.walk(path, followlinks=True) |
| 253 | for file in files |
| 254 | ) |
| 255 | return filter(os.path.isfile, results) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 256 | |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 257 | |
Jason R. Coombs | edc4b2f | 2015-09-19 18:12:15 +0200 | [diff] [blame] | 258 | def findall(dir=os.curdir): |
| 259 | """ |
| 260 | Find all files under 'dir' and return the list of full filenames. |
| 261 | Unless dir is '.', return full filenames with dir prepended. |
| 262 | """ |
| 263 | files = _find_all_simple(dir) |
| 264 | if dir == os.curdir: |
| 265 | make_rel = functools.partial(os.path.relpath, start=dir) |
| 266 | files = map(make_rel, files) |
| 267 | return list(files) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 268 | |
| 269 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 270 | def glob_to_re(pattern): |
Tarek Ziadé | 3679727 | 2010-07-22 12:50:05 +0000 | [diff] [blame] | 271 | """Translate a shell-like glob pattern to a regular expression; return |
| 272 | a string containing the regex. Differs from 'fnmatch.translate()' in |
| 273 | that '*' does not match "special characters" (which are |
| 274 | platform-specific). |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 275 | """ |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 276 | pattern_re = fnmatch.translate(pattern) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 277 | |
| 278 | # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which |
| 279 | # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, |
| 280 | # and by extension they shouldn't match such "special characters" under |
| 281 | # any OS. So change all non-escaped dots in the RE to match any |
Éric Araujo | 2e0a0e1 | 2012-02-25 16:28:05 +0100 | [diff] [blame] | 282 | # character except the special characters (currently: just os.sep). |
| 283 | sep = os.sep |
| 284 | if os.sep == '\\': |
| 285 | # we're using a regex to manipulate a regex, so we need |
| 286 | # to escape the backslash twice |
| 287 | sep = r'\\\\' |
| 288 | escaped = r'\1[^%s]' % sep |
| 289 | pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 290 | return pattern_re |
| 291 | |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 292 | |
Collin Winter | 5b7e9d7 | 2007-08-30 03:52:21 +0000 | [diff] [blame] | 293 | def translate_pattern(pattern, anchor=1, prefix=None, is_regex=0): |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 294 | """Translate a shell-like wildcard pattern to a compiled regular |
Tarek Ziadé | 3679727 | 2010-07-22 12:50:05 +0000 | [diff] [blame] | 295 | expression. Return the compiled regex. If 'is_regex' true, |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 296 | then 'pattern' is directly compiled to a regex (if it's a string) |
| 297 | or just returned as-is (assumes it's a regex object). |
| 298 | """ |
| 299 | if is_regex: |
Guido van Rossum | 3172c5d | 2007-10-16 18:12:55 +0000 | [diff] [blame] | 300 | if isinstance(pattern, str): |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 301 | return re.compile(pattern) |
| 302 | else: |
| 303 | return pattern |
| 304 | |
Serhiy Storchaka | bd48d27 | 2016-09-11 12:50:02 +0300 | [diff] [blame] | 305 | # ditch start and end characters |
| 306 | start, _, end = glob_to_re('_').partition('_') |
| 307 | |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 308 | if pattern: |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 309 | pattern_re = glob_to_re(pattern) |
Serhiy Storchaka | bd48d27 | 2016-09-11 12:50:02 +0300 | [diff] [blame] | 310 | assert pattern_re.startswith(start) and pattern_re.endswith(end) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 311 | else: |
| 312 | pattern_re = '' |
Fred Drake | b94b849 | 2001-12-06 20:51:35 +0000 | [diff] [blame] | 313 | |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 314 | if prefix is not None: |
Serhiy Storchaka | bd48d27 | 2016-09-11 12:50:02 +0300 | [diff] [blame] | 315 | prefix_re = glob_to_re(prefix) |
| 316 | assert prefix_re.startswith(start) and prefix_re.endswith(end) |
| 317 | prefix_re = prefix_re[len(start): len(prefix_re) - len(end)] |
Éric Araujo | 2e0a0e1 | 2012-02-25 16:28:05 +0100 | [diff] [blame] | 318 | sep = os.sep |
| 319 | if os.sep == '\\': |
| 320 | sep = r'\\' |
Serhiy Storchaka | bd48d27 | 2016-09-11 12:50:02 +0300 | [diff] [blame] | 321 | pattern_re = pattern_re[len(start): len(pattern_re) - len(end)] |
| 322 | pattern_re = r'%s\A%s%s.*%s%s' % (start, prefix_re, sep, pattern_re, end) |
Greg Ward | adc1172 | 2000-07-30 00:04:17 +0000 | [diff] [blame] | 323 | else: # no prefix -- respect anchor flag |
| 324 | if anchor: |
Serhiy Storchaka | bd48d27 | 2016-09-11 12:50:02 +0300 | [diff] [blame] | 325 | pattern_re = r'%s\A%s' % (start, pattern_re[len(start):]) |
Fred Drake | b94b849 | 2001-12-06 20:51:35 +0000 | [diff] [blame] | 326 | |
Greg Ward | 071ed76 | 2000-09-26 02:12:31 +0000 | [diff] [blame] | 327 | return re.compile(pattern_re) |