blob: 77384d471edbef79f41d1a8839cf45eb0025cfd4 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks2a84b8b2012-08-02 00:41:40 +000019 #
Anna Zaks81765572012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Anna Zaks7acc4072012-07-16 20:21:42 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
26
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
Anna Zaks81765572012-10-15 22:48:21 +000031import CmpRuns
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000032
Anna Zaks81765572012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
40 self.root = root
41 self.verboseLog = verboseLog
42
Anna Zaks7acc4072012-07-16 20:21:42 +000043class AnalysisDiagnostic:
44 def __init__(self, data, report, htmlReport):
45 self._data = data
46 self._loc = self._data['location']
47 self._report = report
48 self._htmlReport = htmlReport
49
50 def getFileName(self):
51 return self._report.run.getSourceName(self._report.files[self._loc['file']])
52
Anna Zaks81765572012-10-15 22:48:21 +000053 # TODO: This assumes single file!
54 def getRelFileName(self):
55 root = self._report.run.root
56 fileName = self._report.run.getSourceName(self._report.files[self._loc['file']])
57 if fileName.startswith(root) :
58 return fileName[len(root):]
59 return fileName
60
Anna Zaks7acc4072012-07-16 20:21:42 +000061 def getLine(self):
62 return self._loc['line']
63
64 def getColumn(self):
65 return self._loc['col']
66
67 def getCategory(self):
68 return self._data['category']
69
70 def getDescription(self):
71 return self._data['description']
72
73 def getIssueIdentifier(self) :
Anna Zaks81765572012-10-15 22:48:21 +000074 id = self.getFileName() + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000075 if 'issue_context' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000076 id += self._data['issue_context'] + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000077 if 'issue_hash' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000078 id += str(self._data['issue_hash'])
79 return id
Anna Zaks7acc4072012-07-16 20:21:42 +000080
81 def getReport(self):
82 if self._htmlReport is None:
83 return " "
84 return os.path.join(self._report.run.path, self._htmlReport)
85
86 def getReadableName(self):
87 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
88 self.getColumn(), self.getCategory(),
89 self.getDescription())
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000090
91class multidict:
92 def __init__(self, elts=()):
93 self.data = {}
94 for key,value in elts:
95 self[key] = value
96
97 def __getitem__(self, item):
98 return self.data[item]
99 def __setitem__(self, key, value):
100 if key in self.data:
101 self.data[key].append(value)
102 else:
103 self.data[key] = [value]
104 def items(self):
105 return self.data.items()
106 def values(self):
107 return self.data.values()
108 def keys(self):
109 return self.data.keys()
110 def __len__(self):
111 return len(self.data)
112 def get(self, key, default=None):
113 return self.data.get(key, default)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000114
Anna Zaks544055f2011-09-12 21:32:41 +0000115class CmpOptions:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000116 def __init__(self, verboseLog=None, rootA="", rootB=""):
117 self.rootA = rootA
118 self.rootB = rootB
Anna Zaks544055f2011-09-12 21:32:41 +0000119 self.verboseLog = verboseLog
120
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000121class AnalysisReport:
Anna Zaks68aa3a92012-10-16 19:36:39 +0000122 def __init__(self, run, files, clang_vers):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000123 self.run = run
124 self.files = files
Anna Zaks68aa3a92012-10-16 19:36:39 +0000125 self.clang_version = clang_vers
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000126
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000127class AnalysisRun:
Anna Zaks81765572012-10-15 22:48:21 +0000128 def __init__(self, info):
129 self.path = info.path
130 self.root = info.root
131 self.info = info
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000132 self.reports = []
133 self.diagnostics = []
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000134
135 def getSourceName(self, path):
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000136 if path.startswith(self.root):
137 return path[len(self.root):]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000138 return path
139
Anna Zaks81765572012-10-15 22:48:21 +0000140# Backward compatibility API.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000141def loadResults(path, opts, root = "", deleteEmpty=True):
Anna Zaks81765572012-10-15 22:48:21 +0000142 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
143 deleteEmpty)
144
145# Load results of the analyzes from a given output folder.
146# - info is the SingleRunInfo object
147# - deleteEmpty specifies if the empty plist files should be deleted
148def loadResultsFromSingleRun(info, deleteEmpty=True):
149 path = info.path
150 run = AnalysisRun(info)
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000151
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000152 for f in os.listdir(path):
Anna Zaks81765572012-10-15 22:48:21 +0000153 if (not f.endswith('plist')):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000154 continue
155
156 p = os.path.join(path, f)
157 data = plistlib.readPlist(p)
158
Anna Zaksf95a2012011-09-12 22:40:36 +0000159 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000160 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000161 if deleteEmpty == True:
162 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000163 continue
164
165 # Extract the HTML reports, if they exists.
166 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
167 htmlFiles = []
168 for d in data['diagnostics']:
169 # FIXME: Why is this named files, when does it have multiple
170 # files?
Anna Zaks44cceb92012-08-24 21:07:49 +0000171 assert len(d['HTMLDiagnostics_files']) == 1
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000172 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
173 else:
174 htmlFiles = [None] * len(data['diagnostics'])
175
Anna Zaks68aa3a92012-10-16 19:36:39 +0000176 report = AnalysisReport(run, data.pop('files'), data.pop('clang_version'))
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000177 diagnostics = [AnalysisDiagnostic(d, report, h)
178 for d,h in zip(data.pop('diagnostics'),
179 htmlFiles)]
180
181 assert not data
182
183 run.reports.append(report)
184 run.diagnostics.extend(diagnostics)
185
186 return run
187
Anna Zaks7acc4072012-07-16 20:21:42 +0000188def cmpAnalysisDiagnostic(d) :
189 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000190
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000191def compareResults(A, B):
192 """
193 compareResults - Generate a relation from diagnostics in run A to
194 diagnostics in run B.
195
196 The result is the relation as a list of triples (a, b, confidence) where
197 each element {a,b} is None or an element from the respective run, and
198 confidence is a measure of the match quality (where 0 indicates equality,
199 and None is used if either element is None).
200 """
201
202 res = []
203
204 # Quickly eliminate equal elements.
205 neqA = []
206 neqB = []
207 eltsA = list(A.diagnostics)
208 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000209 eltsA.sort(key = cmpAnalysisDiagnostic)
210 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000211 while eltsA and eltsB:
212 a = eltsA.pop()
213 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000214 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000215 res.append((a, b, 0))
Anna Zaks7acc4072012-07-16 20:21:42 +0000216 elif a._data > b._data:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000217 neqA.append(a)
218 eltsB.append(b)
219 else:
220 neqB.append(b)
221 eltsA.append(a)
222 neqA.extend(eltsA)
223 neqB.extend(eltsB)
224
225 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
226 # to bin the diagnostics, print them in a normalized form (based solely on
227 # the structure of the diagnostic), compute the diff, then use that as the
228 # basis for matching. This has the nice property that we don't depend in any
229 # way on the diagnostic format.
230
231 for a in neqA:
232 res.append((a, None, None))
233 for b in neqB:
234 res.append((None, b, None))
235
236 return res
237
Anna Zaks7acc4072012-07-16 20:21:42 +0000238def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000239 # Load the run results.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000240 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
241 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000242
243 # Open the verbose log, if given.
244 if opts.verboseLog:
245 auxLog = open(opts.verboseLog, "wb")
246 else:
247 auxLog = None
248
249 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000250 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000251 for res in diff:
252 a,b,confidence = res
253 if a is None:
254 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000255 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000256 if auxLog:
257 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000258 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000259 elif b is None:
260 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000261 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000262 if auxLog:
263 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000264 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000265 elif confidence:
266 print "CHANGED: %r to %r" % (a.getReadableName(),
267 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000268 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000269 if auxLog:
270 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
271 % (a.getReadableName(),
272 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000273 a.getReport(),
274 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000275 else:
276 pass
277
Anna Zaksa7a25642011-11-08 19:56:31 +0000278 TotalReports = len(resultsB.diagnostics)
279 print "TOTAL REPORTS: %r" % TotalReports
280 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000281 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000282 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
283 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
284
Anna Zaks544055f2011-09-12 21:32:41 +0000285 return foundDiffs
286
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000287def main():
288 from optparse import OptionParser
289 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000290 parser.add_option("", "--rootA", dest="rootA",
291 help="Prefix to ignore on source files for directory A",
292 action="store", type=str, default="")
293 parser.add_option("", "--rootB", dest="rootB",
294 help="Prefix to ignore on source files for directory B",
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000295 action="store", type=str, default="")
296 parser.add_option("", "--verbose-log", dest="verboseLog",
297 help="Write additional information to LOG [default=None]",
298 action="store", type=str, default=None,
299 metavar="LOG")
300 (opts, args) = parser.parse_args()
301
302 if len(args) != 2:
303 parser.error("invalid number of arguments")
304
305 dirA,dirB = args
306
Jordan Roseac57d472012-07-26 20:03:51 +0000307 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000308
309if __name__ == '__main__':
310 main()