blob: 60133453485f17536444e2074243f36b9b240473 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
19 resultsA = loadResults(dirA, opts, deleteEmpty)
20 resultsB = loadResults(dirB, opts, deleteEmpty)
21
22 # Generate a relation from diagnostics in run A to diagnostics in run B
23 # to obtain a list of triples (a, b, confidence).
24 diff = compareResults(resultsA, resultsB)
25
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000026"""
27
28import os
29import plistlib
30
31#
Anna Zaks7acc4072012-07-16 20:21:42 +000032class AnalysisDiagnostic:
33 def __init__(self, data, report, htmlReport):
34 self._data = data
35 self._loc = self._data['location']
36 self._report = report
37 self._htmlReport = htmlReport
38
39 def getFileName(self):
40 return self._report.run.getSourceName(self._report.files[self._loc['file']])
41
42 def getLine(self):
43 return self._loc['line']
44
45 def getColumn(self):
46 return self._loc['col']
47
48 def getCategory(self):
49 return self._data['category']
50
51 def getDescription(self):
52 return self._data['description']
53
54 def getIssueIdentifier(self) :
55 id = ''
56 if 'issue_context' in self._data :
57 id += self._data['issue_context']
58 if 'issue_hash' in self._data :
59 id += str(self._data['issue_hash'])
60 return id
61
62 def getReport(self):
63 if self._htmlReport is None:
64 return " "
65 return os.path.join(self._report.run.path, self._htmlReport)
66
67 def getReadableName(self):
68 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
69 self.getColumn(), self.getCategory(),
70 self.getDescription())
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000071
72class multidict:
73 def __init__(self, elts=()):
74 self.data = {}
75 for key,value in elts:
76 self[key] = value
77
78 def __getitem__(self, item):
79 return self.data[item]
80 def __setitem__(self, key, value):
81 if key in self.data:
82 self.data[key].append(value)
83 else:
84 self.data[key] = [value]
85 def items(self):
86 return self.data.items()
87 def values(self):
88 return self.data.values()
89 def keys(self):
90 return self.data.keys()
91 def __len__(self):
92 return len(self.data)
93 def get(self, key, default=None):
94 return self.data.get(key, default)
95
96#
97
Anna Zaks544055f2011-09-12 21:32:41 +000098class CmpOptions:
99 def __init__(self, verboseLog=None, root=""):
100 self.root = root
101 self.verboseLog = verboseLog
102
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000103class AnalysisReport:
104 def __init__(self, run, files):
105 self.run = run
106 self.files = files
107
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000108class AnalysisRun:
109 def __init__(self, path, opts):
110 self.path = path
111 self.reports = []
112 self.diagnostics = []
113 self.opts = opts
114
115 def getSourceName(self, path):
116 if path.startswith(self.opts.root):
117 return path[len(self.opts.root):]
118 return path
119
Anna Zaksf95a2012011-09-12 22:40:36 +0000120def loadResults(path, opts, deleteEmpty=True):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000121 run = AnalysisRun(path, opts)
122
123 for f in os.listdir(path):
124 if (not f.startswith('report') or
125 not f.endswith('plist')):
126 continue
127
128 p = os.path.join(path, f)
129 data = plistlib.readPlist(p)
130
Anna Zaksf95a2012011-09-12 22:40:36 +0000131 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000132 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000133 if deleteEmpty == True:
134 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000135 continue
136
137 # Extract the HTML reports, if they exists.
138 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
139 htmlFiles = []
140 for d in data['diagnostics']:
141 # FIXME: Why is this named files, when does it have multiple
142 # files?
143 assert len(d['HTMLDiagnostics_files']) == 1
144 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
145 else:
146 htmlFiles = [None] * len(data['diagnostics'])
147
148 report = AnalysisReport(run, data.pop('files'))
149 diagnostics = [AnalysisDiagnostic(d, report, h)
150 for d,h in zip(data.pop('diagnostics'),
151 htmlFiles)]
152
153 assert not data
154
155 run.reports.append(report)
156 run.diagnostics.extend(diagnostics)
157
158 return run
159
Anna Zaks7acc4072012-07-16 20:21:42 +0000160def cmpAnalysisDiagnostic(d) :
161 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000162
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000163def compareResults(A, B):
164 """
165 compareResults - Generate a relation from diagnostics in run A to
166 diagnostics in run B.
167
168 The result is the relation as a list of triples (a, b, confidence) where
169 each element {a,b} is None or an element from the respective run, and
170 confidence is a measure of the match quality (where 0 indicates equality,
171 and None is used if either element is None).
172 """
173
174 res = []
175
176 # Quickly eliminate equal elements.
177 neqA = []
178 neqB = []
179 eltsA = list(A.diagnostics)
180 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000181 eltsA.sort(key = cmpAnalysisDiagnostic)
182 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000183 while eltsA and eltsB:
184 a = eltsA.pop()
185 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000186 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000187 res.append((a, b, 0))
Anna Zaks7acc4072012-07-16 20:21:42 +0000188 elif a._data > b._data:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000189 neqA.append(a)
190 eltsB.append(b)
191 else:
192 neqB.append(b)
193 eltsA.append(a)
194 neqA.extend(eltsA)
195 neqB.extend(eltsB)
196
197 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
198 # to bin the diagnostics, print them in a normalized form (based solely on
199 # the structure of the diagnostic), compute the diff, then use that as the
200 # basis for matching. This has the nice property that we don't depend in any
201 # way on the diagnostic format.
202
203 for a in neqA:
204 res.append((a, None, None))
205 for b in neqB:
206 res.append((None, b, None))
207
208 return res
209
Anna Zaks7acc4072012-07-16 20:21:42 +0000210def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000211 # Load the run results.
Anna Zaksf95a2012011-09-12 22:40:36 +0000212 resultsA = loadResults(dirA, opts, deleteEmpty)
213 resultsB = loadResults(dirB, opts, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000214
215 # Open the verbose log, if given.
216 if opts.verboseLog:
217 auxLog = open(opts.verboseLog, "wb")
218 else:
219 auxLog = None
220
221 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000222 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000223 for res in diff:
224 a,b,confidence = res
225 if a is None:
226 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000227 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000228 if auxLog:
229 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000230 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000231 elif b is None:
232 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000233 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000234 if auxLog:
235 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000236 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000237 elif confidence:
238 print "CHANGED: %r to %r" % (a.getReadableName(),
239 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000240 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000241 if auxLog:
242 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
243 % (a.getReadableName(),
244 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000245 a.getReport(),
246 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000247 else:
248 pass
249
Anna Zaksa7a25642011-11-08 19:56:31 +0000250 TotalReports = len(resultsB.diagnostics)
251 print "TOTAL REPORTS: %r" % TotalReports
252 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000253 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000254 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
255 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
256
Anna Zaks544055f2011-09-12 21:32:41 +0000257 return foundDiffs
258
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000259def main():
260 from optparse import OptionParser
261 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
262 parser.add_option("", "--root", dest="root",
263 help="Prefix to ignore on source files",
264 action="store", type=str, default="")
265 parser.add_option("", "--verbose-log", dest="verboseLog",
266 help="Write additional information to LOG [default=None]",
267 action="store", type=str, default=None,
268 metavar="LOG")
269 (opts, args) = parser.parse_args()
270
271 if len(args) != 2:
272 parser.error("invalid number of arguments")
273
274 dirA,dirB = args
275
Anna Zaks544055f2011-09-12 21:32:41 +0000276 cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000277
278if __name__ == '__main__':
279 main()