Benjamin Peterson | 90f5ba5 | 2010-03-11 22:53:45 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 2 | |
| 3 | """List all those Python files that require a coding directive |
| 4 | |
Éric Araujo | 1e794f6 | 2011-05-05 20:18:16 +0200 | [diff] [blame] | 5 | Usage: findnocoding.py dir1 [dir2...] |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 6 | """ |
| 7 | |
Thomas Wouters | 89f507f | 2006-12-13 04:49:30 +0000 | [diff] [blame] | 8 | __author__ = "Oleg Broytmann, Georg Brandl" |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 9 | |
| 10 | import sys, os, re, getopt |
| 11 | |
| 12 | # our pysource module finds Python source files |
| 13 | try: |
| 14 | import pysource |
Benjamin Peterson | c0747cf | 2008-11-03 20:31:38 +0000 | [diff] [blame] | 15 | except ImportError: |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 16 | # emulate the module with a simple os.walk |
| 17 | class pysource: |
| 18 | has_python_ext = looks_like_python = can_be_compiled = None |
| 19 | def walk_python_files(self, paths, *args, **kwargs): |
| 20 | for path in paths: |
| 21 | if os.path.isfile(path): |
| 22 | yield path.endswith(".py") |
| 23 | elif os.path.isdir(path): |
| 24 | for root, dirs, files in os.walk(path): |
| 25 | for filename in files: |
| 26 | if filename.endswith(".py"): |
| 27 | yield os.path.join(root, filename) |
| 28 | pysource = pysource() |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 29 | |
| 30 | |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 31 | print("The pysource module is not available; " |
| 32 | "no sophisticated Python source file search will be done.", file=sys.stderr) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 33 | |
| 34 | |
Serhiy Storchaka | dafea85 | 2013-09-16 23:51:56 +0300 | [diff] [blame] | 35 | decl_re = re.compile(rb'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') |
Serhiy Storchaka | 768c16c | 2014-01-09 18:36:09 +0200 | [diff] [blame] | 36 | blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)') |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 37 | |
| 38 | def get_declaration(line): |
Serhiy Storchaka | dafea85 | 2013-09-16 23:51:56 +0300 | [diff] [blame] | 39 | match = decl_re.match(line) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 40 | if match: |
| 41 | return match.group(1) |
Serhiy Storchaka | dafea85 | 2013-09-16 23:51:56 +0300 | [diff] [blame] | 42 | return b'' |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 43 | |
| 44 | def has_correct_encoding(text, codec): |
| 45 | try: |
Georg Brandl | 8efadf5 | 2008-05-16 15:23:30 +0000 | [diff] [blame] | 46 | str(text, codec) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 47 | except UnicodeDecodeError: |
| 48 | return False |
| 49 | else: |
| 50 | return True |
| 51 | |
| 52 | def needs_declaration(fullpath): |
| 53 | try: |
Victor Stinner | 98516a6 | 2012-08-01 20:12:51 +0200 | [diff] [blame] | 54 | infile = open(fullpath, 'rb') |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 55 | except IOError: # Oops, the file was removed - ignore it |
| 56 | return None |
| 57 | |
Victor Stinner | 98516a6 | 2012-08-01 20:12:51 +0200 | [diff] [blame] | 58 | with infile: |
| 59 | line1 = infile.readline() |
| 60 | line2 = infile.readline() |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 61 | |
Serhiy Storchaka | 768c16c | 2014-01-09 18:36:09 +0200 | [diff] [blame] | 62 | if (get_declaration(line1) or |
| 63 | blank_re.match(line1) and get_declaration(line2)): |
Victor Stinner | 98516a6 | 2012-08-01 20:12:51 +0200 | [diff] [blame] | 64 | # the file does have an encoding declaration, so trust it |
Victor Stinner | 98516a6 | 2012-08-01 20:12:51 +0200 | [diff] [blame] | 65 | return False |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 66 | |
Victor Stinner | 98516a6 | 2012-08-01 20:12:51 +0200 | [diff] [blame] | 67 | # check the whole file for non utf-8 characters |
| 68 | rest = infile.read() |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 69 | |
Benjamin Peterson | cff882c | 2008-10-25 23:43:00 +0000 | [diff] [blame] | 70 | if has_correct_encoding(line1+line2+rest, "utf-8"): |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 71 | return False |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 72 | |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 73 | return True |
| 74 | |
| 75 | |
| 76 | usage = """Usage: %s [-cd] paths... |
| 77 | -c: recognize Python source files trying to compile them |
| 78 | -d: debug output""" % sys.argv[0] |
| 79 | |
R David Murray | 54ac832 | 2012-04-04 21:28:14 -0400 | [diff] [blame] | 80 | if __name__ == '__main__': |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 81 | |
R David Murray | 54ac832 | 2012-04-04 21:28:14 -0400 | [diff] [blame] | 82 | try: |
| 83 | opts, args = getopt.getopt(sys.argv[1:], 'cd') |
| 84 | except getopt.error as msg: |
| 85 | print(msg, file=sys.stderr) |
| 86 | print(usage, file=sys.stderr) |
| 87 | sys.exit(1) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 88 | |
R David Murray | 54ac832 | 2012-04-04 21:28:14 -0400 | [diff] [blame] | 89 | is_python = pysource.looks_like_python |
| 90 | debug = False |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 91 | |
R David Murray | 54ac832 | 2012-04-04 21:28:14 -0400 | [diff] [blame] | 92 | for o, a in opts: |
| 93 | if o == '-c': |
| 94 | is_python = pysource.can_be_compiled |
| 95 | elif o == '-d': |
| 96 | debug = True |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 97 | |
R David Murray | 54ac832 | 2012-04-04 21:28:14 -0400 | [diff] [blame] | 98 | if not args: |
| 99 | print(usage, file=sys.stderr) |
| 100 | sys.exit(1) |
| 101 | |
| 102 | for fullpath in pysource.walk_python_files(args, is_python): |
| 103 | if debug: |
| 104 | print("Testing for coding: %s" % fullpath) |
| 105 | result = needs_declaration(fullpath) |
| 106 | if result: |
| 107 | print(fullpath) |