blob: 739d5847734dd0ee87b02de5334ab868bac92bc0 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22 def __init__(self, elts=()):
23 self.data = {}
24 for key,value in elts:
25 self[key] = value
26
27 def __getitem__(self, item):
28 return self.data[item]
29 def __setitem__(self, key, value):
30 if key in self.data:
31 self.data[key].append(value)
32 else:
33 self.data[key] = [value]
34 def items(self):
35 return self.data.items()
36 def values(self):
37 return self.data.values()
38 def keys(self):
39 return self.data.keys()
40 def __len__(self):
41 return len(self.data)
42 def get(self, key, default=None):
43 return self.data.get(key, default)
44
45#
46
47class AnalysisReport:
48 def __init__(self, run, files):
49 self.run = run
50 self.files = files
51
52class AnalysisDiagnostic:
53 def __init__(self, data, report, htmlReport):
54 self.data = data
55 self.report = report
56 self.htmlReport = htmlReport
57
58 def getReadableName(self):
59 loc = self.data['location']
60 filename = self.report.run.getSourceName(self.report.files[loc['file']])
61 line = loc['line']
62 column = loc['col']
63
64 # FIXME: Get a report number based on this key, to 'distinguish'
65 # reports, or something.
66
67 return '%s:%d:%d' % (filename, line, column)
68
69 def getReportData(self):
70 if self.htmlReport is None:
71 return "This diagnostic does not have any report data."
72
73 return open(os.path.join(self.report.run.path,
74 self.htmlReport), "rb").read()
75
76class AnalysisRun:
77 def __init__(self, path, opts):
78 self.path = path
79 self.reports = []
80 self.diagnostics = []
81 self.opts = opts
82
83 def getSourceName(self, path):
84 if path.startswith(self.opts.root):
85 return path[len(self.opts.root):]
86 return path
87
88def loadResults(path, opts):
89 run = AnalysisRun(path, opts)
90
91 for f in os.listdir(path):
92 if (not f.startswith('report') or
93 not f.endswith('plist')):
94 continue
95
96 p = os.path.join(path, f)
97 data = plistlib.readPlist(p)
98
99 # Ignore empty reports.
100 if not data['files']:
101 continue
102
103 # Extract the HTML reports, if they exists.
104 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
105 htmlFiles = []
106 for d in data['diagnostics']:
107 # FIXME: Why is this named files, when does it have multiple
108 # files?
109 assert len(d['HTMLDiagnostics_files']) == 1
110 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
111 else:
112 htmlFiles = [None] * len(data['diagnostics'])
113
114 report = AnalysisReport(run, data.pop('files'))
115 diagnostics = [AnalysisDiagnostic(d, report, h)
116 for d,h in zip(data.pop('diagnostics'),
117 htmlFiles)]
118
119 assert not data
120
121 run.reports.append(report)
122 run.diagnostics.extend(diagnostics)
123
124 return run
125
126def compareResults(A, B):
127 """
128 compareResults - Generate a relation from diagnostics in run A to
129 diagnostics in run B.
130
131 The result is the relation as a list of triples (a, b, confidence) where
132 each element {a,b} is None or an element from the respective run, and
133 confidence is a measure of the match quality (where 0 indicates equality,
134 and None is used if either element is None).
135 """
136
137 res = []
138
139 # Quickly eliminate equal elements.
140 neqA = []
141 neqB = []
142 eltsA = list(A.diagnostics)
143 eltsB = list(B.diagnostics)
144 eltsA.sort(key = lambda d: d.data)
145 eltsB.sort(key = lambda d: d.data)
146 while eltsA and eltsB:
147 a = eltsA.pop()
148 b = eltsB.pop()
149 if a.data == b.data:
150 res.append((a, b, 0))
151 elif a.data > b.data:
152 neqA.append(a)
153 eltsB.append(b)
154 else:
155 neqB.append(b)
156 eltsA.append(a)
157 neqA.extend(eltsA)
158 neqB.extend(eltsB)
159
160 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
161 # to bin the diagnostics, print them in a normalized form (based solely on
162 # the structure of the diagnostic), compute the diff, then use that as the
163 # basis for matching. This has the nice property that we don't depend in any
164 # way on the diagnostic format.
165
166 for a in neqA:
167 res.append((a, None, None))
168 for b in neqB:
169 res.append((None, b, None))
170
171 return res
172
173def main():
174 from optparse import OptionParser
175 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
176 parser.add_option("", "--root", dest="root",
177 help="Prefix to ignore on source files",
178 action="store", type=str, default="")
179 parser.add_option("", "--verbose-log", dest="verboseLog",
180 help="Write additional information to LOG [default=None]",
181 action="store", type=str, default=None,
182 metavar="LOG")
183 (opts, args) = parser.parse_args()
184
185 if len(args) != 2:
186 parser.error("invalid number of arguments")
187
188 dirA,dirB = args
189
190 # Load the run results.
191 resultsA = loadResults(dirA, opts)
192 resultsB = loadResults(dirB, opts)
193
194 # Open the verbose log, if given.
195 if opts.verboseLog:
196 auxLog = open(opts.verboseLog, "wb")
197 else:
198 auxLog = None
199
200 diff = compareResults(resultsA, resultsB)
201 for res in diff:
202 a,b,confidence = res
203 if a is None:
204 print "ADDED: %r" % b.getReadableName()
205 if auxLog:
206 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
207 b.getReportData()))
208 elif b is None:
209 print "REMOVED: %r" % a.getReadableName()
210 if auxLog:
211 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
212 a.getReportData()))
213 elif confidence:
214 print "CHANGED: %r to %r" % (a.getReadableName(),
215 b.getReadableName())
216 if auxLog:
217 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
218 % (a.getReadableName(),
219 b.getReadableName(),
220 a.getReportData(),
221 b.getReportData()))
222 else:
223 pass
224
225 print "TOTAL REPORTS: %r" % len(resultsB.diagnostics)
226 if auxLog:
227 print >>auxLog, "('TOTAL', %r)" % len(resultsB.diagnostics)
228
229if __name__ == '__main__':
230 main()