Benjamin Peterson | 90f5ba5 | 2010-03-11 22:53:45 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 2 | |
| 3 | """List all those Python files that require a coding directive |
| 4 | |
Éric Araujo | 1e794f6 | 2011-05-05 20:18:16 +0200 | [diff] [blame] | 5 | Usage: findnocoding.py dir1 [dir2...] |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 6 | """ |
| 7 | |
Thomas Wouters | 89f507f | 2006-12-13 04:49:30 +0000 | [diff] [blame] | 8 | __author__ = "Oleg Broytmann, Georg Brandl" |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 9 | |
| 10 | import sys, os, re, getopt |
| 11 | |
| 12 | # our pysource module finds Python source files |
| 13 | try: |
| 14 | import pysource |
Benjamin Peterson | c0747cf | 2008-11-03 20:31:38 +0000 | [diff] [blame] | 15 | except ImportError: |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 16 | # emulate the module with a simple os.walk |
| 17 | class pysource: |
| 18 | has_python_ext = looks_like_python = can_be_compiled = None |
| 19 | def walk_python_files(self, paths, *args, **kwargs): |
| 20 | for path in paths: |
| 21 | if os.path.isfile(path): |
| 22 | yield path.endswith(".py") |
| 23 | elif os.path.isdir(path): |
| 24 | for root, dirs, files in os.walk(path): |
| 25 | for filename in files: |
| 26 | if filename.endswith(".py"): |
| 27 | yield os.path.join(root, filename) |
| 28 | pysource = pysource() |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 29 | |
| 30 | |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 31 | print("The pysource module is not available; " |
| 32 | "no sophisticated Python source file search will be done.", file=sys.stderr) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 33 | |
| 34 | |
| 35 | decl_re = re.compile(r"coding[=:]\s*([-\w.]+)") |
| 36 | |
| 37 | def get_declaration(line): |
| 38 | match = decl_re.search(line) |
| 39 | if match: |
| 40 | return match.group(1) |
| 41 | return '' |
| 42 | |
| 43 | def has_correct_encoding(text, codec): |
| 44 | try: |
Georg Brandl | 8efadf5 | 2008-05-16 15:23:30 +0000 | [diff] [blame] | 45 | str(text, codec) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 46 | except UnicodeDecodeError: |
| 47 | return False |
| 48 | else: |
| 49 | return True |
| 50 | |
| 51 | def needs_declaration(fullpath): |
| 52 | try: |
Éric Araujo | 1e794f6 | 2011-05-05 20:18:16 +0200 | [diff] [blame] | 53 | infile = open(fullpath) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 54 | except IOError: # Oops, the file was removed - ignore it |
| 55 | return None |
| 56 | |
| 57 | line1 = infile.readline() |
| 58 | line2 = infile.readline() |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 59 | |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 60 | if get_declaration(line1) or get_declaration(line2): |
| 61 | # the file does have an encoding declaration, so trust it |
| 62 | infile.close() |
| 63 | return False |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 64 | |
Benjamin Peterson | cff882c | 2008-10-25 23:43:00 +0000 | [diff] [blame] | 65 | # check the whole file for non utf-8 characters |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 66 | rest = infile.read() |
| 67 | infile.close() |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 68 | |
Benjamin Peterson | cff882c | 2008-10-25 23:43:00 +0000 | [diff] [blame] | 69 | if has_correct_encoding(line1+line2+rest, "utf-8"): |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 70 | return False |
Tim Peters | 9e34c04 | 2005-08-26 15:20:46 +0000 | [diff] [blame] | 71 | |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 72 | return True |
| 73 | |
| 74 | |
| 75 | usage = """Usage: %s [-cd] paths... |
| 76 | -c: recognize Python source files trying to compile them |
| 77 | -d: debug output""" % sys.argv[0] |
| 78 | |
| 79 | try: |
| 80 | opts, args = getopt.getopt(sys.argv[1:], 'cd') |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 81 | except getopt.error as msg: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 82 | print(msg, file=sys.stderr) |
| 83 | print(usage, file=sys.stderr) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 84 | sys.exit(1) |
| 85 | |
| 86 | is_python = pysource.looks_like_python |
| 87 | debug = False |
| 88 | |
| 89 | for o, a in opts: |
| 90 | if o == '-c': |
| 91 | is_python = pysource.can_be_compiled |
| 92 | elif o == '-d': |
| 93 | debug = True |
| 94 | |
| 95 | if not args: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 96 | print(usage, file=sys.stderr) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 97 | sys.exit(1) |
| 98 | |
| 99 | for fullpath in pysource.walk_python_files(args, is_python): |
| 100 | if debug: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 101 | print("Testing for coding: %s" % fullpath) |
Georg Brandl | 5689731 | 2005-08-24 18:32:30 +0000 | [diff] [blame] | 102 | result = needs_declaration(fullpath) |
| 103 | if result: |
Collin Winter | 6afaeb7 | 2007-08-03 17:06:41 +0000 | [diff] [blame] | 104 | print(fullpath) |