blob: 3ca9b2bbe7eb1f25cba0473df2c7d1b01cc1c6ac [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks2a84b8b2012-08-02 00:41:40 +000019 #
Anna Zaks81765572012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Anna Zaks7acc4072012-07-16 20:21:42 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
26
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
Anna Zaks81765572012-10-15 22:48:21 +000031import CmpRuns
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000032
Anna Zaks81765572012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
40 self.root = root
41 self.verboseLog = verboseLog
42
Anna Zaks7acc4072012-07-16 20:21:42 +000043class AnalysisDiagnostic:
44 def __init__(self, data, report, htmlReport):
45 self._data = data
46 self._loc = self._data['location']
47 self._report = report
48 self._htmlReport = htmlReport
49
50 def getFileName(self):
Anna Zaks81765572012-10-15 22:48:21 +000051 root = self._report.run.root
Anna Zaks20d92812012-10-17 21:09:26 +000052 fileName = self._report.files[self._loc['file']]
Anna Zaks81765572012-10-15 22:48:21 +000053 if fileName.startswith(root) :
54 return fileName[len(root):]
55 return fileName
56
Anna Zaks7acc4072012-07-16 20:21:42 +000057 def getLine(self):
58 return self._loc['line']
59
60 def getColumn(self):
61 return self._loc['col']
62
63 def getCategory(self):
64 return self._data['category']
65
66 def getDescription(self):
67 return self._data['description']
68
69 def getIssueIdentifier(self) :
Anna Zaks81765572012-10-15 22:48:21 +000070 id = self.getFileName() + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000071 if 'issue_context' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000072 id += self._data['issue_context'] + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000073 if 'issue_hash' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000074 id += str(self._data['issue_hash'])
75 return id
Anna Zaks7acc4072012-07-16 20:21:42 +000076
77 def getReport(self):
78 if self._htmlReport is None:
79 return " "
80 return os.path.join(self._report.run.path, self._htmlReport)
81
82 def getReadableName(self):
83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
84 self.getColumn(), self.getCategory(),
85 self.getDescription())
Anna Zaks20d92812012-10-17 21:09:26 +000086
87 # Note, the data format is not an API and may change from one analyzer
88 # version to another.
89 def getRawData(self):
90 return self._data
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000091
92class multidict:
93 def __init__(self, elts=()):
94 self.data = {}
95 for key,value in elts:
96 self[key] = value
97
98 def __getitem__(self, item):
99 return self.data[item]
100 def __setitem__(self, key, value):
101 if key in self.data:
102 self.data[key].append(value)
103 else:
104 self.data[key] = [value]
105 def items(self):
106 return self.data.items()
107 def values(self):
108 return self.data.values()
109 def keys(self):
110 return self.data.keys()
111 def __len__(self):
112 return len(self.data)
113 def get(self, key, default=None):
114 return self.data.get(key, default)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000115
Anna Zaks544055f2011-09-12 21:32:41 +0000116class CmpOptions:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000117 def __init__(self, verboseLog=None, rootA="", rootB=""):
118 self.rootA = rootA
119 self.rootB = rootB
Anna Zaks544055f2011-09-12 21:32:41 +0000120 self.verboseLog = verboseLog
121
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000122class AnalysisReport:
Anna Zaks68aa3a92012-10-16 19:36:39 +0000123 def __init__(self, run, files, clang_vers):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000124 self.run = run
Anna Zaks68aa3a92012-10-16 19:36:39 +0000125 self.clang_version = clang_vers
Anna Zaks20d92812012-10-17 21:09:26 +0000126 self.files = files
127 self.diagnostics = []
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000128
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000129class AnalysisRun:
Anna Zaks81765572012-10-15 22:48:21 +0000130 def __init__(self, info):
131 self.path = info.path
132 self.root = info.root
133 self.info = info
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000134 self.reports = []
Anna Zaks20d92812012-10-17 21:09:26 +0000135 # Cumulative list of all diagnostics from all the reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000136 self.diagnostics = []
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000137
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000138
Anna Zaks81765572012-10-15 22:48:21 +0000139# Backward compatibility API.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000140def loadResults(path, opts, root = "", deleteEmpty=True):
Anna Zaks81765572012-10-15 22:48:21 +0000141 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
142 deleteEmpty)
143
144# Load results of the analyzes from a given output folder.
145# - info is the SingleRunInfo object
146# - deleteEmpty specifies if the empty plist files should be deleted
147def loadResultsFromSingleRun(info, deleteEmpty=True):
148 path = info.path
149 run = AnalysisRun(info)
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000150
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000151 for f in os.listdir(path):
Anna Zaks81765572012-10-15 22:48:21 +0000152 if (not f.endswith('plist')):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000153 continue
154
155 p = os.path.join(path, f)
156 data = plistlib.readPlist(p)
157
Anna Zaksf95a2012011-09-12 22:40:36 +0000158 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000159 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000160 if deleteEmpty == True:
161 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000162 continue
163
164 # Extract the HTML reports, if they exists.
165 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
166 htmlFiles = []
167 for d in data['diagnostics']:
168 # FIXME: Why is this named files, when does it have multiple
169 # files?
Anna Zaks44cceb92012-08-24 21:07:49 +0000170 assert len(d['HTMLDiagnostics_files']) == 1
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000171 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
172 else:
173 htmlFiles = [None] * len(data['diagnostics'])
Anna Zaks20d92812012-10-17 21:09:26 +0000174
175 clang_version = ''
176 if 'clang_version' in data:
177 clang_version = data.pop('clang_version')
178
179 report = AnalysisReport(run, data.pop('files'), clang_version)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000180 diagnostics = [AnalysisDiagnostic(d, report, h)
181 for d,h in zip(data.pop('diagnostics'),
182 htmlFiles)]
183
184 assert not data
Anna Zaks20d92812012-10-17 21:09:26 +0000185
186 report.diagnostics.extend(diagnostics)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000187 run.reports.append(report)
188 run.diagnostics.extend(diagnostics)
189
190 return run
191
Anna Zaks7acc4072012-07-16 20:21:42 +0000192def cmpAnalysisDiagnostic(d) :
193 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000194
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000195def compareResults(A, B):
196 """
197 compareResults - Generate a relation from diagnostics in run A to
198 diagnostics in run B.
199
200 The result is the relation as a list of triples (a, b, confidence) where
201 each element {a,b} is None or an element from the respective run, and
202 confidence is a measure of the match quality (where 0 indicates equality,
203 and None is used if either element is None).
204 """
205
206 res = []
207
208 # Quickly eliminate equal elements.
209 neqA = []
210 neqB = []
211 eltsA = list(A.diagnostics)
212 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000213 eltsA.sort(key = cmpAnalysisDiagnostic)
214 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000215 while eltsA and eltsB:
216 a = eltsA.pop()
217 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000218 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000219 res.append((a, b, 0))
Anna Zaks20d92812012-10-17 21:09:26 +0000220 elif a.getIssueIdentifier() > b.getIssueIdentifier():
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000221 eltsB.append(b)
Anna Zaks20d92812012-10-17 21:09:26 +0000222 neqA.append(a)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000223 else:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000224 eltsA.append(a)
Anna Zaks20d92812012-10-17 21:09:26 +0000225 neqB.append(b)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000226 neqA.extend(eltsA)
227 neqB.extend(eltsB)
228
229 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
230 # to bin the diagnostics, print them in a normalized form (based solely on
231 # the structure of the diagnostic), compute the diff, then use that as the
232 # basis for matching. This has the nice property that we don't depend in any
233 # way on the diagnostic format.
234
235 for a in neqA:
236 res.append((a, None, None))
237 for b in neqB:
238 res.append((None, b, None))
239
240 return res
241
Anna Zaks7acc4072012-07-16 20:21:42 +0000242def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000243 # Load the run results.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000244 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
245 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000246
247 # Open the verbose log, if given.
248 if opts.verboseLog:
249 auxLog = open(opts.verboseLog, "wb")
250 else:
251 auxLog = None
252
253 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000254 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000255 for res in diff:
256 a,b,confidence = res
257 if a is None:
258 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000259 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000260 if auxLog:
261 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000262 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000263 elif b is None:
264 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000265 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000266 if auxLog:
267 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000268 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000269 elif confidence:
270 print "CHANGED: %r to %r" % (a.getReadableName(),
271 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000272 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000273 if auxLog:
274 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
275 % (a.getReadableName(),
276 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000277 a.getReport(),
278 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000279 else:
280 pass
281
Anna Zaksa7a25642011-11-08 19:56:31 +0000282 TotalReports = len(resultsB.diagnostics)
283 print "TOTAL REPORTS: %r" % TotalReports
284 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000285 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000286 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
287 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
288
Anna Zaks544055f2011-09-12 21:32:41 +0000289 return foundDiffs
290
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000291def main():
292 from optparse import OptionParser
293 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000294 parser.add_option("", "--rootA", dest="rootA",
295 help="Prefix to ignore on source files for directory A",
296 action="store", type=str, default="")
297 parser.add_option("", "--rootB", dest="rootB",
298 help="Prefix to ignore on source files for directory B",
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000299 action="store", type=str, default="")
300 parser.add_option("", "--verbose-log", dest="verboseLog",
301 help="Write additional information to LOG [default=None]",
302 action="store", type=str, default=None,
303 metavar="LOG")
304 (opts, args) = parser.parse_args()
305
306 if len(args) != 2:
307 parser.error("invalid number of arguments")
308
309 dirA,dirB = args
310
Jordan Roseac57d472012-07-26 20:03:51 +0000311 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000312
313if __name__ == '__main__':
314 main()