blob: 5aa1febade8dd8551fd2e32f388f04f746b9f0e9 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Georg Brandl56897312005-08-24 18:32:30 +00002
3"""List all those Python files that require a coding directive
4
Éric Araujo1e794f62011-05-05 20:18:16 +02005Usage: findnocoding.py dir1 [dir2...]
Georg Brandl56897312005-08-24 18:32:30 +00006"""
7
Thomas Wouters89f507f2006-12-13 04:49:30 +00008__author__ = "Oleg Broytmann, Georg Brandl"
Georg Brandl56897312005-08-24 18:32:30 +00009
10import sys, os, re, getopt
11
12# our pysource module finds Python source files
13try:
14 import pysource
Benjamin Petersonc0747cf2008-11-03 20:31:38 +000015except ImportError:
Georg Brandl56897312005-08-24 18:32:30 +000016 # emulate the module with a simple os.walk
17 class pysource:
18 has_python_ext = looks_like_python = can_be_compiled = None
19 def walk_python_files(self, paths, *args, **kwargs):
20 for path in paths:
21 if os.path.isfile(path):
22 yield path.endswith(".py")
23 elif os.path.isdir(path):
24 for root, dirs, files in os.walk(path):
25 for filename in files:
26 if filename.endswith(".py"):
27 yield os.path.join(root, filename)
28 pysource = pysource()
Tim Peters9e34c042005-08-26 15:20:46 +000029
30
Collin Winter6afaeb72007-08-03 17:06:41 +000031 print("The pysource module is not available; "
32 "no sophisticated Python source file search will be done.", file=sys.stderr)
Georg Brandl56897312005-08-24 18:32:30 +000033
34
35decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")
36
37def get_declaration(line):
38 match = decl_re.search(line)
39 if match:
40 return match.group(1)
41 return ''
42
43def has_correct_encoding(text, codec):
44 try:
Georg Brandl8efadf52008-05-16 15:23:30 +000045 str(text, codec)
Georg Brandl56897312005-08-24 18:32:30 +000046 except UnicodeDecodeError:
47 return False
48 else:
49 return True
50
51def needs_declaration(fullpath):
52 try:
Éric Araujo1e794f62011-05-05 20:18:16 +020053 infile = open(fullpath)
Georg Brandl56897312005-08-24 18:32:30 +000054 except IOError: # Oops, the file was removed - ignore it
55 return None
56
57 line1 = infile.readline()
58 line2 = infile.readline()
Tim Peters9e34c042005-08-26 15:20:46 +000059
Georg Brandl56897312005-08-24 18:32:30 +000060 if get_declaration(line1) or get_declaration(line2):
61 # the file does have an encoding declaration, so trust it
62 infile.close()
63 return False
Tim Peters9e34c042005-08-26 15:20:46 +000064
Benjamin Petersoncff882c2008-10-25 23:43:00 +000065 # check the whole file for non utf-8 characters
Georg Brandl56897312005-08-24 18:32:30 +000066 rest = infile.read()
67 infile.close()
Tim Peters9e34c042005-08-26 15:20:46 +000068
Benjamin Petersoncff882c2008-10-25 23:43:00 +000069 if has_correct_encoding(line1+line2+rest, "utf-8"):
Georg Brandl56897312005-08-24 18:32:30 +000070 return False
Tim Peters9e34c042005-08-26 15:20:46 +000071
Georg Brandl56897312005-08-24 18:32:30 +000072 return True
73
74
75usage = """Usage: %s [-cd] paths...
76 -c: recognize Python source files trying to compile them
77 -d: debug output""" % sys.argv[0]
78
R David Murray54ac8322012-04-04 21:28:14 -040079if __name__ == '__main__':
Georg Brandl56897312005-08-24 18:32:30 +000080
R David Murray54ac8322012-04-04 21:28:14 -040081 try:
82 opts, args = getopt.getopt(sys.argv[1:], 'cd')
83 except getopt.error as msg:
84 print(msg, file=sys.stderr)
85 print(usage, file=sys.stderr)
86 sys.exit(1)
Georg Brandl56897312005-08-24 18:32:30 +000087
R David Murray54ac8322012-04-04 21:28:14 -040088 is_python = pysource.looks_like_python
89 debug = False
Georg Brandl56897312005-08-24 18:32:30 +000090
R David Murray54ac8322012-04-04 21:28:14 -040091 for o, a in opts:
92 if o == '-c':
93 is_python = pysource.can_be_compiled
94 elif o == '-d':
95 debug = True
Georg Brandl56897312005-08-24 18:32:30 +000096
R David Murray54ac8322012-04-04 21:28:14 -040097 if not args:
98 print(usage, file=sys.stderr)
99 sys.exit(1)
100
101 for fullpath in pysource.walk_python_files(args, is_python):
102 if debug:
103 print("Testing for coding: %s" % fullpath)
104 result = needs_declaration(fullpath)
105 if result:
106 print(fullpath)