| #!/usr/bin/env python3 |
| |
| """\ |
| List python source files. |
| |
| There are three functions to check whether a file is a Python source, listed |
| here with increasing complexity: |
| |
| - has_python_ext() checks whether a file name ends in '.py[w]'. |
| - look_like_python() checks whether the file is not binary and either has |
| the '.py[w]' extension or the first line contains the word 'python'. |
| - can_be_compiled() checks whether the file can be compiled by compile(). |
| |
| The file also must be of appropriate size - not bigger than a megabyte. |
| |
| walk_python_files() recursively lists all Python files under the given directories. |
| """ |
| __author__ = "Oleg Broytmann, Georg Brandl" |
| |
| __all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"] |
| |
| |
| import os, re |
| |
| binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]') |
| |
| debug = False |
| |
| def print_debug(msg): |
| if debug: print(msg) |
| |
| |
| def _open(fullpath): |
| try: |
| size = os.stat(fullpath).st_size |
| except OSError as err: # Permission denied - ignore the file |
| print_debug("%s: permission denied: %s" % (fullpath, err)) |
| return None |
| |
| if size > 1024*1024: # too big |
| print_debug("%s: the file is too big: %d bytes" % (fullpath, size)) |
| return None |
| |
| try: |
| return open(fullpath, 'rU') |
| except IOError as err: # Access denied, or a special file - ignore it |
| print_debug("%s: access denied: %s" % (fullpath, err)) |
| return None |
| |
| def has_python_ext(fullpath): |
| return fullpath.endswith(".py") or fullpath.endswith(".pyw") |
| |
| def looks_like_python(fullpath): |
| infile = _open(fullpath) |
| if infile is None: |
| return False |
| |
| line = infile.readline() |
| infile.close() |
| |
| if binary_re.search(line): |
| # file appears to be binary |
| print_debug("%s: appears to be binary" % fullpath) |
| return False |
| |
| if fullpath.endswith(".py") or fullpath.endswith(".pyw"): |
| return True |
| elif "python" in line: |
| # disguised Python script (e.g. CGI) |
| return True |
| |
| return False |
| |
| def can_be_compiled(fullpath): |
| infile = _open(fullpath) |
| if infile is None: |
| return False |
| |
| code = infile.read() |
| infile.close() |
| |
| try: |
| compile(code, fullpath, "exec") |
| except Exception as err: |
| print_debug("%s: cannot compile: %s" % (fullpath, err)) |
| return False |
| |
| return True |
| |
| |
| def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None): |
| """\ |
| Recursively yield all Python source files below the given paths. |
| |
| paths: a list of files and/or directories to be checked. |
| is_python: a function that takes a file name and checks whether it is a |
| Python source file |
| exclude_dirs: a list of directory base names that should be excluded in |
| the search |
| """ |
| if exclude_dirs is None: |
| exclude_dirs=[] |
| |
| for path in paths: |
| print_debug("testing: %s" % path) |
| if os.path.isfile(path): |
| if is_python(path): |
| yield path |
| elif os.path.isdir(path): |
| print_debug(" it is a directory") |
| for dirpath, dirnames, filenames in os.walk(path): |
| for exclude in exclude_dirs: |
| if exclude in dirnames: |
| dirnames.remove(exclude) |
| for filename in filenames: |
| fullpath = os.path.join(dirpath, filename) |
| print_debug("testing: %s" % fullpath) |
| if is_python(fullpath): |
| yield fullpath |
| else: |
| print_debug(" unknown type") |
| |
| |
| if __name__ == "__main__": |
| # Two simple examples/tests |
| for fullpath in walk_python_files(['.']): |
| print(fullpath) |
| print("----------") |
| for fullpath in walk_python_files(['.'], is_python=can_be_compiled): |
| print(fullpath) |