blob: 69e8e0df4ff0802fc31bfb829fdc83bb4907b92e [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Georg Brandl56897312005-08-24 18:32:30 +00002
3"""\
4List python source files.
5
6There are three functions to check whether a file is a Python source, listed
7here with increasing complexity:
8
9- has_python_ext() checks whether a file name ends in '.py[w]'.
10- look_like_python() checks whether the file is not binary and either has
11 the '.py[w]' extension or the first line contains the word 'python'.
12- can_be_compiled() checks whether the file can be compiled by compile().
13
14The file also must be of appropriate size - not bigger than a megabyte.
15
16walk_python_files() recursively lists all Python files under the given directories.
17"""
Thomas Wouters89f507f2006-12-13 04:49:30 +000018__author__ = "Oleg Broytmann, Georg Brandl"
Georg Brandl56897312005-08-24 18:32:30 +000019
20__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
21
22
Christian Heimes05e8be12008-02-23 18:30:17 +000023import os, re
Georg Brandl56897312005-08-24 18:32:30 +000024
Victor Stinner98516a62012-08-01 20:12:51 +020025binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]')
Georg Brandl56897312005-08-24 18:32:30 +000026
27debug = False
28
29def print_debug(msg):
Collin Winter6afaeb72007-08-03 17:06:41 +000030 if debug: print(msg)
Georg Brandl56897312005-08-24 18:32:30 +000031
32
33def _open(fullpath):
34 try:
35 size = os.stat(fullpath).st_size
Guido van Rossumb940e112007-01-10 16:19:56 +000036 except OSError as err: # Permission denied - ignore the file
Georg Brandl56897312005-08-24 18:32:30 +000037 print_debug("%s: permission denied: %s" % (fullpath, err))
38 return None
39
40 if size > 1024*1024: # too big
41 print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
42 return None
43
44 try:
Victor Stinner98516a62012-08-01 20:12:51 +020045 return open(fullpath, "rb")
Guido van Rossumb940e112007-01-10 16:19:56 +000046 except IOError as err: # Access denied, or a special file - ignore it
Georg Brandl56897312005-08-24 18:32:30 +000047 print_debug("%s: access denied: %s" % (fullpath, err))
48 return None
49
50def has_python_ext(fullpath):
51 return fullpath.endswith(".py") or fullpath.endswith(".pyw")
52
53def looks_like_python(fullpath):
54 infile = _open(fullpath)
55 if infile is None:
56 return False
57
Victor Stinnera90f3112012-08-02 00:05:41 +020058 with infile:
59 line = infile.readline()
Tim Peters9e34c042005-08-26 15:20:46 +000060
Georg Brandl56897312005-08-24 18:32:30 +000061 if binary_re.search(line):
62 # file appears to be binary
63 print_debug("%s: appears to be binary" % fullpath)
64 return False
Tim Peters9e34c042005-08-26 15:20:46 +000065
Georg Brandl56897312005-08-24 18:32:30 +000066 if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
67 return True
Victor Stinner98516a62012-08-01 20:12:51 +020068 elif b"python" in line:
Georg Brandl56897312005-08-24 18:32:30 +000069 # disguised Python script (e.g. CGI)
70 return True
71
72 return False
73
74def can_be_compiled(fullpath):
75 infile = _open(fullpath)
76 if infile is None:
77 return False
78
Victor Stinnera90f3112012-08-02 00:05:41 +020079 with infile:
80 code = infile.read()
Georg Brandl56897312005-08-24 18:32:30 +000081
82 try:
83 compile(code, fullpath, "exec")
Guido van Rossumb940e112007-01-10 16:19:56 +000084 except Exception as err:
Georg Brandl56897312005-08-24 18:32:30 +000085 print_debug("%s: cannot compile: %s" % (fullpath, err))
86 return False
87
88 return True
89
90
91def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
92 """\
93 Recursively yield all Python source files below the given paths.
94
95 paths: a list of files and/or directories to be checked.
96 is_python: a function that takes a file name and checks whether it is a
97 Python source file
Tim Peters9e34c042005-08-26 15:20:46 +000098 exclude_dirs: a list of directory base names that should be excluded in
Georg Brandl56897312005-08-24 18:32:30 +000099 the search
100 """
101 if exclude_dirs is None:
102 exclude_dirs=[]
Tim Peters9e34c042005-08-26 15:20:46 +0000103
Georg Brandl56897312005-08-24 18:32:30 +0000104 for path in paths:
105 print_debug("testing: %s" % path)
106 if os.path.isfile(path):
107 if is_python(path):
108 yield path
109 elif os.path.isdir(path):
110 print_debug(" it is a directory")
111 for dirpath, dirnames, filenames in os.walk(path):
112 for exclude in exclude_dirs:
113 if exclude in dirnames:
114 dirnames.remove(exclude)
115 for filename in filenames:
116 fullpath = os.path.join(dirpath, filename)
117 print_debug("testing: %s" % fullpath)
118 if is_python(fullpath):
119 yield fullpath
120 else:
121 print_debug(" unknown type")
122
123
124if __name__ == "__main__":
125 # Two simple examples/tests
126 for fullpath in walk_python_files(['.']):
Collin Winter6afaeb72007-08-03 17:06:41 +0000127 print(fullpath)
128 print("----------")
Georg Brandl56897312005-08-24 18:32:30 +0000129 for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
Collin Winter6afaeb72007-08-03 17:06:41 +0000130 print(fullpath)