blob: 9b8deff061aa4a043079a90128049848b79fded0 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks2a84b8b2012-08-02 00:41:40 +000019 #
Anna Zaks81765572012-10-15 22:48:21 +000020 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
Anna Zaks7acc4072012-07-16 20:21:42 +000022
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
26
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000027"""
28
29import os
30import plistlib
Anna Zaks81765572012-10-15 22:48:21 +000031import CmpRuns
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000032
Anna Zaks81765572012-10-15 22:48:21 +000033# Information about analysis run:
34# path - the analysis output directory
35# root - the name of the root directory, which will be disregarded when
36# determining the source file name
37class SingleRunInfo:
38 def __init__(self, path, root="", verboseLog=None):
39 self.path = path
40 self.root = root
41 self.verboseLog = verboseLog
42
Anna Zaks7acc4072012-07-16 20:21:42 +000043class AnalysisDiagnostic:
44 def __init__(self, data, report, htmlReport):
45 self._data = data
46 self._loc = self._data['location']
47 self._report = report
48 self._htmlReport = htmlReport
49
50 def getFileName(self):
51 return self._report.run.getSourceName(self._report.files[self._loc['file']])
52
Anna Zaks81765572012-10-15 22:48:21 +000053 # TODO: This assumes single file!
54 def getRelFileName(self):
55 root = self._report.run.root
56 fileName = self._report.run.getSourceName(self._report.files[self._loc['file']])
57 if fileName.startswith(root) :
58 return fileName[len(root):]
59 return fileName
60
Anna Zaks7acc4072012-07-16 20:21:42 +000061 def getLine(self):
62 return self._loc['line']
63
64 def getColumn(self):
65 return self._loc['col']
66
67 def getCategory(self):
68 return self._data['category']
69
70 def getDescription(self):
71 return self._data['description']
72
73 def getIssueIdentifier(self) :
Anna Zaks81765572012-10-15 22:48:21 +000074 id = self.getFileName() + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000075 if 'issue_context' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000076 id += self._data['issue_context'] + "+"
Anna Zaks7acc4072012-07-16 20:21:42 +000077 if 'issue_hash' in self._data :
Anna Zaks81765572012-10-15 22:48:21 +000078 id += str(self._data['issue_hash'])
79 return id
Anna Zaks7acc4072012-07-16 20:21:42 +000080
81 def getReport(self):
82 if self._htmlReport is None:
83 return " "
84 return os.path.join(self._report.run.path, self._htmlReport)
85
86 def getReadableName(self):
87 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
88 self.getColumn(), self.getCategory(),
89 self.getDescription())
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000090
91class multidict:
92 def __init__(self, elts=()):
93 self.data = {}
94 for key,value in elts:
95 self[key] = value
96
97 def __getitem__(self, item):
98 return self.data[item]
99 def __setitem__(self, key, value):
100 if key in self.data:
101 self.data[key].append(value)
102 else:
103 self.data[key] = [value]
104 def items(self):
105 return self.data.items()
106 def values(self):
107 return self.data.values()
108 def keys(self):
109 return self.data.keys()
110 def __len__(self):
111 return len(self.data)
112 def get(self, key, default=None):
113 return self.data.get(key, default)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000114
Anna Zaks544055f2011-09-12 21:32:41 +0000115class CmpOptions:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000116 def __init__(self, verboseLog=None, rootA="", rootB=""):
117 self.rootA = rootA
118 self.rootB = rootB
Anna Zaks544055f2011-09-12 21:32:41 +0000119 self.verboseLog = verboseLog
120
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000121class AnalysisReport:
122 def __init__(self, run, files):
123 self.run = run
124 self.files = files
125
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000126class AnalysisRun:
Anna Zaks81765572012-10-15 22:48:21 +0000127 def __init__(self, info):
128 self.path = info.path
129 self.root = info.root
130 self.info = info
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000131 self.reports = []
132 self.diagnostics = []
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000133
134 def getSourceName(self, path):
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000135 if path.startswith(self.root):
136 return path[len(self.root):]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000137 return path
138
Anna Zaks81765572012-10-15 22:48:21 +0000139# Backward compatibility API.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000140def loadResults(path, opts, root = "", deleteEmpty=True):
Anna Zaks81765572012-10-15 22:48:21 +0000141 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
142 deleteEmpty)
143
144# Load results of the analyzes from a given output folder.
145# - info is the SingleRunInfo object
146# - deleteEmpty specifies if the empty plist files should be deleted
147def loadResultsFromSingleRun(info, deleteEmpty=True):
148 path = info.path
149 run = AnalysisRun(info)
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000150
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000151 for f in os.listdir(path):
Anna Zaks81765572012-10-15 22:48:21 +0000152 if (not f.endswith('plist')):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000153 continue
154
155 p = os.path.join(path, f)
156 data = plistlib.readPlist(p)
157
Anna Zaksf95a2012011-09-12 22:40:36 +0000158 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000159 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000160 if deleteEmpty == True:
161 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000162 continue
163
164 # Extract the HTML reports, if they exists.
165 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
166 htmlFiles = []
167 for d in data['diagnostics']:
168 # FIXME: Why is this named files, when does it have multiple
169 # files?
Anna Zaks44cceb92012-08-24 21:07:49 +0000170 assert len(d['HTMLDiagnostics_files']) == 1
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000171 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
172 else:
173 htmlFiles = [None] * len(data['diagnostics'])
174
175 report = AnalysisReport(run, data.pop('files'))
176 diagnostics = [AnalysisDiagnostic(d, report, h)
177 for d,h in zip(data.pop('diagnostics'),
178 htmlFiles)]
179
180 assert not data
181
182 run.reports.append(report)
183 run.diagnostics.extend(diagnostics)
184
185 return run
186
Anna Zaks7acc4072012-07-16 20:21:42 +0000187def cmpAnalysisDiagnostic(d) :
188 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000189
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000190def compareResults(A, B):
191 """
192 compareResults - Generate a relation from diagnostics in run A to
193 diagnostics in run B.
194
195 The result is the relation as a list of triples (a, b, confidence) where
196 each element {a,b} is None or an element from the respective run, and
197 confidence is a measure of the match quality (where 0 indicates equality,
198 and None is used if either element is None).
199 """
200
201 res = []
202
203 # Quickly eliminate equal elements.
204 neqA = []
205 neqB = []
206 eltsA = list(A.diagnostics)
207 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000208 eltsA.sort(key = cmpAnalysisDiagnostic)
209 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000210 while eltsA and eltsB:
211 a = eltsA.pop()
212 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000213 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000214 res.append((a, b, 0))
Anna Zaks7acc4072012-07-16 20:21:42 +0000215 elif a._data > b._data:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000216 neqA.append(a)
217 eltsB.append(b)
218 else:
219 neqB.append(b)
220 eltsA.append(a)
221 neqA.extend(eltsA)
222 neqB.extend(eltsB)
223
224 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
225 # to bin the diagnostics, print them in a normalized form (based solely on
226 # the structure of the diagnostic), compute the diff, then use that as the
227 # basis for matching. This has the nice property that we don't depend in any
228 # way on the diagnostic format.
229
230 for a in neqA:
231 res.append((a, None, None))
232 for b in neqB:
233 res.append((None, b, None))
234
235 return res
236
Anna Zaks7acc4072012-07-16 20:21:42 +0000237def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000238 # Load the run results.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000239 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
240 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000241
242 # Open the verbose log, if given.
243 if opts.verboseLog:
244 auxLog = open(opts.verboseLog, "wb")
245 else:
246 auxLog = None
247
248 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000249 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000250 for res in diff:
251 a,b,confidence = res
252 if a is None:
253 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000254 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000255 if auxLog:
256 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000257 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000258 elif b is None:
259 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000260 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000261 if auxLog:
262 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000263 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000264 elif confidence:
265 print "CHANGED: %r to %r" % (a.getReadableName(),
266 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000267 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000268 if auxLog:
269 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
270 % (a.getReadableName(),
271 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000272 a.getReport(),
273 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000274 else:
275 pass
276
Anna Zaksa7a25642011-11-08 19:56:31 +0000277 TotalReports = len(resultsB.diagnostics)
278 print "TOTAL REPORTS: %r" % TotalReports
279 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000280 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000281 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
282 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
283
Anna Zaks544055f2011-09-12 21:32:41 +0000284 return foundDiffs
285
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000286def main():
287 from optparse import OptionParser
288 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000289 parser.add_option("", "--rootA", dest="rootA",
290 help="Prefix to ignore on source files for directory A",
291 action="store", type=str, default="")
292 parser.add_option("", "--rootB", dest="rootB",
293 help="Prefix to ignore on source files for directory B",
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000294 action="store", type=str, default="")
295 parser.add_option("", "--verbose-log", dest="verboseLog",
296 help="Write additional information to LOG [default=None]",
297 action="store", type=str, default=None,
298 metavar="LOG")
299 (opts, args) = parser.parse_args()
300
301 if len(args) != 2:
302 parser.error("invalid number of arguments")
303
304 dirA,dirB = args
305
Jordan Roseac57d472012-07-26 20:03:51 +0000306 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000307
308if __name__ == '__main__':
309 main()