blob: 46152b964417716af21bc62b968beda1bcdebb25 [file] [log] [blame]
Elliott Hughes387d4b72012-08-09 15:17:46 -07001#!/usr/bin/python
2# Run with directory arguments from any directory, with no special setup required.
3
4import ftplib
5import hashlib
6import os
7import re
8import shutil
9import string
10import subprocess
11import sys
12import tarfile
13import tempfile
14
15def IsUninteresting(path):
16 path = path.lower()
17 if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):
18 return True
19 if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
20 return True
21 if path.endswith("/zoneinfo.dat") or path.endswith("/zoneinfo.idx") or path.endswith("/zoneinfo.version") or path.endswith("/zoneinfo/generate"):
22 return True
23 return False
24
25def IsAutoGenerated(content):
26 if "generated by gensyscalls.py" in content or "generated by genserv.py" in content:
27 return True
28 if "This header was automatically generated from a Linux kernel header" in content:
29 return True
30 return False
31
32copyrights = set()
33
34def ExtractCopyrightAt(lines, i):
35 hash = lines[i].startswith("#")
36
Elliott Hughes261e2232012-08-14 15:04:05 -070037 # Do we need to back up to find the start of the copyright header?
38 start = i
39 if not hash:
40 while start > 0:
41 if "/*" in lines[start - 1]:
42 break
43 start -= 1
44
Elliott Hughes387d4b72012-08-09 15:17:46 -070045 # Read comment lines until we hit something that terminates a
46 # copyright header.
Elliott Hughes387d4b72012-08-09 15:17:46 -070047 while i < len(lines):
48 if "*/" in lines[i]:
49 break
50 if hash and len(lines[i]) == 0:
51 break
52 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
53 break
54 if "\tcitrus Id: " in lines[i]:
55 break
56 if "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
57 break
58 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
59 break
60 i += 1
61
62 end = i
63
64 # Trim trailing cruft.
65 while end > 0:
66 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
67 break
68 end -= 1
69
70 # Remove C/assembler comment formatting, pulling out just the text.
71 clean_lines = []
72 for line in lines[start:end]:
73 line = line.replace("\t", " ")
74 line = line.replace("/* ", "")
75 line = line.replace(" * ", "")
76 line = line.replace("** ", "")
77 line = line.replace("# ", "")
78 if line.startswith("++Copyright++"):
79 continue
80 line = line.replace("--Copyright--", "")
81 line = line.rstrip()
82 # These come last and take care of "blank" comment lines.
83 if line == "#" or line == " *" or line == "**" or line == "-":
84 line = ""
85 clean_lines.append(line)
86
87 # Trim blank lines from head and tail.
88 while clean_lines[0] == "":
89 clean_lines = clean_lines[1:]
90 while clean_lines[len(clean_lines) - 1] == "":
91 clean_lines = clean_lines[0:(len(clean_lines) - 1)]
92
93 copyright = "\n".join(clean_lines)
94 copyrights.add(copyright)
95
96 return i
97
98args = sys.argv[1:]
99if len(args) == 0:
100 args = [ "." ]
101
102for arg in args:
103 sys.stderr.write('Searching for source files in "%s"...\n' % arg)
104
105 for directory, sub_directories, filenames in os.walk(arg):
106 if ".git" in sub_directories:
107 sub_directories.remove(".git")
108 sub_directories = sorted(sub_directories)
109
110 for filename in sorted(filenames):
111 path = os.path.join(directory, filename)
112 if IsUninteresting(path):
113 #print "ignoring uninteresting file %s" % path
114 continue
115
116 try:
117 content = open(path, 'r').read().decode('utf-8')
118 except:
119 # TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already.
120 sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
121 content = open(path, 'r').read().decode('iso-8859-1')
122
123 lines = content.split("\n")
124
125 if len(lines) <= 4:
126 #print "ignoring short file %s" % path
127 continue
128
129 if IsAutoGenerated(content):
130 #print "ignoring auto-generated file %s" % path
131 continue
132
133 if not "Copyright" in content:
134 if "public domain" in content.lower():
135 #print "ignoring public domain file %s" % path
136 continue
137 sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
138 continue
139
140 i = 0
141 while i < len(lines):
142 if "Copyright" in lines[i]:
143 i = ExtractCopyrightAt(lines, i)
144 i += 1
145
146 #print path
147
Elliott Hughes261e2232012-08-14 15:04:05 -0700148for copyright in sorted(copyrights):
Elliott Hughes387d4b72012-08-09 15:17:46 -0700149 print copyright.encode('utf-8')
150 print
151 print '-------------------------------------------------------------------'
152 print
153
154sys.exit(0)