blob: c42fa7cfa3e0cc2093988575d7c328929632c10a [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Georg Brandl56897312005-08-24 18:32:30 +00002
3"""List all those Python files that require a coding directive
4
Éric Araujo1e794f62011-05-05 20:18:16 +02005Usage: findnocoding.py dir1 [dir2...]
Georg Brandl56897312005-08-24 18:32:30 +00006"""
7
Thomas Wouters89f507f2006-12-13 04:49:30 +00008__author__ = "Oleg Broytmann, Georg Brandl"
Georg Brandl56897312005-08-24 18:32:30 +00009
10import sys, os, re, getopt
11
12# our pysource module finds Python source files
13try:
14 import pysource
Benjamin Petersonc0747cf2008-11-03 20:31:38 +000015except ImportError:
Georg Brandl56897312005-08-24 18:32:30 +000016 # emulate the module with a simple os.walk
17 class pysource:
18 has_python_ext = looks_like_python = can_be_compiled = None
19 def walk_python_files(self, paths, *args, **kwargs):
20 for path in paths:
21 if os.path.isfile(path):
22 yield path.endswith(".py")
23 elif os.path.isdir(path):
24 for root, dirs, files in os.walk(path):
25 for filename in files:
26 if filename.endswith(".py"):
27 yield os.path.join(root, filename)
28 pysource = pysource()
Tim Peters9e34c042005-08-26 15:20:46 +000029
30
Collin Winter6afaeb72007-08-03 17:06:41 +000031 print("The pysource module is not available; "
32 "no sophisticated Python source file search will be done.", file=sys.stderr)
Georg Brandl56897312005-08-24 18:32:30 +000033
34
35decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")
36
37def get_declaration(line):
38 match = decl_re.search(line)
39 if match:
40 return match.group(1)
41 return ''
42
43def has_correct_encoding(text, codec):
44 try:
Georg Brandl8efadf52008-05-16 15:23:30 +000045 str(text, codec)
Georg Brandl56897312005-08-24 18:32:30 +000046 except UnicodeDecodeError:
47 return False
48 else:
49 return True
50
51def needs_declaration(fullpath):
52 try:
Éric Araujo1e794f62011-05-05 20:18:16 +020053 infile = open(fullpath)
Georg Brandl56897312005-08-24 18:32:30 +000054 except IOError: # Oops, the file was removed - ignore it
55 return None
56
57 line1 = infile.readline()
58 line2 = infile.readline()
Tim Peters9e34c042005-08-26 15:20:46 +000059
Georg Brandl56897312005-08-24 18:32:30 +000060 if get_declaration(line1) or get_declaration(line2):
61 # the file does have an encoding declaration, so trust it
62 infile.close()
63 return False
Tim Peters9e34c042005-08-26 15:20:46 +000064
Benjamin Petersoncff882c2008-10-25 23:43:00 +000065 # check the whole file for non utf-8 characters
Georg Brandl56897312005-08-24 18:32:30 +000066 rest = infile.read()
67 infile.close()
Tim Peters9e34c042005-08-26 15:20:46 +000068
Benjamin Petersoncff882c2008-10-25 23:43:00 +000069 if has_correct_encoding(line1+line2+rest, "utf-8"):
Georg Brandl56897312005-08-24 18:32:30 +000070 return False
Tim Peters9e34c042005-08-26 15:20:46 +000071
Georg Brandl56897312005-08-24 18:32:30 +000072 return True
73
74
75usage = """Usage: %s [-cd] paths...
76 -c: recognize Python source files trying to compile them
77 -d: debug output""" % sys.argv[0]
78
79try:
80 opts, args = getopt.getopt(sys.argv[1:], 'cd')
Guido van Rossumb940e112007-01-10 16:19:56 +000081except getopt.error as msg:
Collin Winter6afaeb72007-08-03 17:06:41 +000082 print(msg, file=sys.stderr)
83 print(usage, file=sys.stderr)
Georg Brandl56897312005-08-24 18:32:30 +000084 sys.exit(1)
85
86is_python = pysource.looks_like_python
87debug = False
88
89for o, a in opts:
90 if o == '-c':
91 is_python = pysource.can_be_compiled
92 elif o == '-d':
93 debug = True
94
95if not args:
Collin Winter6afaeb72007-08-03 17:06:41 +000096 print(usage, file=sys.stderr)
Georg Brandl56897312005-08-24 18:32:30 +000097 sys.exit(1)
98
99for fullpath in pysource.walk_python_files(args, is_python):
100 if debug:
Collin Winter6afaeb72007-08-03 17:06:41 +0000101 print("Testing for coding: %s" % fullpath)
Georg Brandl56897312005-08-24 18:32:30 +0000102 result = needs_declaration(fullpath)
103 if result:
Collin Winter6afaeb72007-08-03 17:06:41 +0000104 print(fullpath)