Patch [ 784089 ] A program to scan python files and list those require coding
diff --git a/Misc/NEWS b/Misc/NEWS
index 765565c..7e21b7a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -502,6 +502,11 @@
 Tools/Demos
 -----------
 
+- Added two new files to Tools/scripts: pysource.py, which recursively
+  finds Python source files, and findnocoding.py, which finds Python
+  source files that need an encoding declaration.
+  Patch #784089, credits to Oleg Broytmann.
+
 - Bug #1072853: pindent.py used an uninitialized variable.
 
 - Patch #1177597: Correct Complex.__init__.
diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py
new file mode 100755
index 0000000..707bf23
--- /dev/null
+++ b/Tools/scripts/findnocoding.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+
+"""List all those Python files that require a coding directive
+
+Usage: nocoding.py dir1 [dir2...]
+"""
+
+__author__ = "Oleg Broytmann, Reinhold Birkenfeld"
+
+import sys, os, re, getopt
+
+# our pysource module finds Python source files
+try:
+    import pysource
+except:
+    # emulate the module with a simple os.walk
+    class pysource:
+        has_python_ext = looks_like_python = can_be_compiled = None
+        def walk_python_files(self, paths, *args, **kwargs):
+            for path in paths:
+                if os.path.isfile(path):
+                    yield path.endswith(".py")
+                elif os.path.isdir(path):
+                    for root, dirs, files in os.walk(path):
+                        for filename in files:
+                            if filename.endswith(".py"):
+                                yield os.path.join(root, filename)
+    pysource = pysource()
+                
+            
+    print >>sys.stderr, ("The pysource module is not available; "
+                         "no sophisticated Python source file search will be done.")
+
+
+decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")
+
+def get_declaration(line):
+    match = decl_re.search(line)
+    if match:
+        return match.group(1)
+    return ''
+
+def has_correct_encoding(text, codec):
+    try:
+        unicode(text, codec)
+    except UnicodeDecodeError:
+        return False
+    else:
+        return True
+
+def needs_declaration(fullpath):
+    try:
+        infile = open(fullpath, 'rU')
+    except IOError: # Oops, the file was removed - ignore it
+        return None
+
+    line1 = infile.readline()
+    line2 = infile.readline()
+    
+    if get_declaration(line1) or get_declaration(line2):
+        # the file does have an encoding declaration, so trust it
+        infile.close()
+        return False
+    
+    # check the whole file for non-ASCII characters
+    rest = infile.read()
+    infile.close()
+    
+    if has_correct_encoding(line1+line2+rest, "ascii"):
+        return False
+    
+    return True
+
+
+usage = """Usage: %s [-cd] paths...
+    -c: recognize Python source files trying to compile them
+    -d: debug output""" % sys.argv[0]
+
+try:
+    opts, args = getopt.getopt(sys.argv[1:], 'cd')
+except getopt.error, msg:
+    print >>sys.stderr, msg
+    print >>sys.stderr, usage
+    sys.exit(1)
+
+is_python = pysource.looks_like_python
+debug = False
+
+for o, a in opts:
+    if o == '-c':
+        is_python = pysource.can_be_compiled
+    elif o == '-d':
+        debug = True
+
+if not args:
+    print >>sys.stderr, usage
+    sys.exit(1)
+
+for fullpath in pysource.walk_python_files(args, is_python):
+    if debug:
+        print "Testing for coding: %s" % fullpath
+    result = needs_declaration(fullpath)
+    if result:
+        print fullpath
+
+
diff --git a/Tools/scripts/pysource.py b/Tools/scripts/pysource.py
new file mode 100644
index 0000000..3b01bfc
--- /dev/null
+++ b/Tools/scripts/pysource.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+
+"""\
+List python source files.
+
+There are three functions to check whether a file is a Python source, listed
+here with increasing complexity:
+
+- has_python_ext() checks whether a file name ends in '.py[w]'.
+- look_like_python() checks whether the file is not binary and either has
+  the '.py[w]' extension or the first line contains the word 'python'.
+- can_be_compiled() checks whether the file can be compiled by compile().
+
+The file also must be of appropriate size - not bigger than a megabyte.
+
+walk_python_files() recursively lists all Python files under the given directories.
+"""
+__author__ = "Oleg Broytmann, Reinhold Birkenfeld"
+
+__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
+
+
+import sys, os, re
+
+binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]')
+
+debug = False
+
+def print_debug(msg):
+    if debug: print msg
+
+
+def _open(fullpath):
+    try:
+        size = os.stat(fullpath).st_size
+    except OSError, err: # Permission denied - ignore the file
+        print_debug("%s: permission denied: %s" % (fullpath, err))
+        return None
+
+    if size > 1024*1024: # too big
+        print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
+        return None
+
+    try:
+        return open(fullpath, 'rU')
+    except IOError, err: # Access denied, or a special file - ignore it
+        print_debug("%s: access denied: %s" % (fullpath, err))
+        return None
+
+def has_python_ext(fullpath):
+    return fullpath.endswith(".py") or fullpath.endswith(".pyw")
+
+def looks_like_python(fullpath):
+    infile = _open(fullpath)
+    if infile is None:
+        return False
+
+    line = infile.readline()
+    infile.close()
+    
+    if binary_re.search(line):
+        # file appears to be binary
+        print_debug("%s: appears to be binary" % fullpath)
+        return False
+        
+    if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
+        return True
+    elif "python" in line:
+        # disguised Python script (e.g. CGI)
+        return True
+
+    return False
+
+def can_be_compiled(fullpath):
+    infile = _open(fullpath)
+    if infile is None:
+        return False
+
+    code = infile.read()
+    infile.close()
+
+    try:
+        compile(code, fullpath, "exec")
+    except Exception, err:
+        print_debug("%s: cannot compile: %s" % (fullpath, err))
+        return False
+
+    return True
+
+
+def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
+    """\
+    Recursively yield all Python source files below the given paths.
+
+    paths: a list of files and/or directories to be checked.
+    is_python: a function that takes a file name and checks whether it is a
+               Python source file
+    exclude_dirs: a list of directory base names that should be excluded in 
+                  the search
+    """
+    if exclude_dirs is None:
+        exclude_dirs=[]
+    
+    for path in paths:
+        print_debug("testing: %s" % path)
+        if os.path.isfile(path):
+            if is_python(path):
+                yield path
+        elif os.path.isdir(path):
+            print_debug("    it is a directory")
+            for dirpath, dirnames, filenames in os.walk(path):
+                for exclude in exclude_dirs:
+                    if exclude in dirnames:
+                        dirnames.remove(exclude)
+                for filename in filenames:
+                    fullpath = os.path.join(dirpath, filename)
+                    print_debug("testing: %s" % fullpath)
+                    if is_python(fullpath):
+                        yield fullpath
+        else:
+            print_debug("    unknown type")
+
+
+if __name__ == "__main__":
+    # Two simple examples/tests
+    for fullpath in walk_python_files(['.']):
+        print fullpath
+    print "----------"
+    for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
+        print fullpath