blob: f2961cf0acf5ab30c803fc003702970e01da5016 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
Anna Zaks7acc4072012-07-16 20:21:42 +000014
15Usage:
16
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
Anna Zaks2a84b8b2012-08-02 00:41:40 +000019 #
20 # root - the name of the root directory, which will be disregarded when
21 # determining the source file name
22 #
23 resultsA = loadResults(dirA, opts, root, deleteEmpty)
24 resultsB = loadResults(dirB, opts, root, deleteEmpty)
Anna Zaks7acc4072012-07-16 20:21:42 +000025
26 # Generate a relation from diagnostics in run A to diagnostics in run B
27 # to obtain a list of triples (a, b, confidence).
28 diff = compareResults(resultsA, resultsB)
29
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000030"""
31
32import os
33import plistlib
34
35#
Anna Zaks7acc4072012-07-16 20:21:42 +000036class AnalysisDiagnostic:
37 def __init__(self, data, report, htmlReport):
38 self._data = data
39 self._loc = self._data['location']
40 self._report = report
41 self._htmlReport = htmlReport
42
43 def getFileName(self):
44 return self._report.run.getSourceName(self._report.files[self._loc['file']])
45
46 def getLine(self):
47 return self._loc['line']
48
49 def getColumn(self):
50 return self._loc['col']
51
52 def getCategory(self):
53 return self._data['category']
54
55 def getDescription(self):
56 return self._data['description']
57
58 def getIssueIdentifier(self) :
59 id = ''
60 if 'issue_context' in self._data :
Anna Zaks2a84b8b2012-08-02 00:41:40 +000061 id += self._data['issue_context'] + ":"
Anna Zaks7acc4072012-07-16 20:21:42 +000062 if 'issue_hash' in self._data :
Anna Zaks2a84b8b2012-08-02 00:41:40 +000063 id += str(self._data['issue_hash']) + ":"
64 return id + ":" + self.getFileName()
Anna Zaks7acc4072012-07-16 20:21:42 +000065
66 def getReport(self):
67 if self._htmlReport is None:
68 return " "
69 return os.path.join(self._report.run.path, self._htmlReport)
70
71 def getReadableName(self):
72 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
73 self.getColumn(), self.getCategory(),
74 self.getDescription())
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000075
76class multidict:
77 def __init__(self, elts=()):
78 self.data = {}
79 for key,value in elts:
80 self[key] = value
81
82 def __getitem__(self, item):
83 return self.data[item]
84 def __setitem__(self, key, value):
85 if key in self.data:
86 self.data[key].append(value)
87 else:
88 self.data[key] = [value]
89 def items(self):
90 return self.data.items()
91 def values(self):
92 return self.data.values()
93 def keys(self):
94 return self.data.keys()
95 def __len__(self):
96 return len(self.data)
97 def get(self, key, default=None):
98 return self.data.get(key, default)
99
100#
101
Anna Zaks544055f2011-09-12 21:32:41 +0000102class CmpOptions:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000103 def __init__(self, verboseLog=None, rootA="", rootB=""):
104 self.rootA = rootA
105 self.rootB = rootB
Anna Zaks544055f2011-09-12 21:32:41 +0000106 self.verboseLog = verboseLog
107
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000108class AnalysisReport:
109 def __init__(self, run, files):
110 self.run = run
111 self.files = files
112
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000113class AnalysisRun:
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000114 def __init__(self, path, root, opts):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000115 self.path = path
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000116 self.root = root
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000117 self.reports = []
118 self.diagnostics = []
119 self.opts = opts
120
121 def getSourceName(self, path):
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000122 if path.startswith(self.root):
123 return path[len(self.root):]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000124 return path
125
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000126def loadResults(path, opts, root = "", deleteEmpty=True):
127 run = AnalysisRun(path, root, opts)
128
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000129 for f in os.listdir(path):
130 if (not f.startswith('report') or
131 not f.endswith('plist')):
132 continue
133
134 p = os.path.join(path, f)
135 data = plistlib.readPlist(p)
136
Anna Zaksf95a2012011-09-12 22:40:36 +0000137 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000138 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000139 if deleteEmpty == True:
140 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000141 continue
142
143 # Extract the HTML reports, if they exists.
144 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
145 htmlFiles = []
146 for d in data['diagnostics']:
147 # FIXME: Why is this named files, when does it have multiple
148 # files?
149 assert len(d['HTMLDiagnostics_files']) == 1
150 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
151 else:
152 htmlFiles = [None] * len(data['diagnostics'])
153
154 report = AnalysisReport(run, data.pop('files'))
155 diagnostics = [AnalysisDiagnostic(d, report, h)
156 for d,h in zip(data.pop('diagnostics'),
157 htmlFiles)]
158
159 assert not data
160
161 run.reports.append(report)
162 run.diagnostics.extend(diagnostics)
163
164 return run
165
Anna Zaks7acc4072012-07-16 20:21:42 +0000166def cmpAnalysisDiagnostic(d) :
167 return d.getIssueIdentifier()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000168
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000169def compareResults(A, B):
170 """
171 compareResults - Generate a relation from diagnostics in run A to
172 diagnostics in run B.
173
174 The result is the relation as a list of triples (a, b, confidence) where
175 each element {a,b} is None or an element from the respective run, and
176 confidence is a measure of the match quality (where 0 indicates equality,
177 and None is used if either element is None).
178 """
179
180 res = []
181
182 # Quickly eliminate equal elements.
183 neqA = []
184 neqB = []
185 eltsA = list(A.diagnostics)
186 eltsB = list(B.diagnostics)
Anna Zaks7acc4072012-07-16 20:21:42 +0000187 eltsA.sort(key = cmpAnalysisDiagnostic)
188 eltsB.sort(key = cmpAnalysisDiagnostic)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000189 while eltsA and eltsB:
190 a = eltsA.pop()
191 b = eltsB.pop()
Anna Zaks7acc4072012-07-16 20:21:42 +0000192 if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000193 res.append((a, b, 0))
Anna Zaks7acc4072012-07-16 20:21:42 +0000194 elif a._data > b._data:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000195 neqA.append(a)
196 eltsB.append(b)
197 else:
198 neqB.append(b)
199 eltsA.append(a)
200 neqA.extend(eltsA)
201 neqB.extend(eltsB)
202
203 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
204 # to bin the diagnostics, print them in a normalized form (based solely on
205 # the structure of the diagnostic), compute the diff, then use that as the
206 # basis for matching. This has the nice property that we don't depend in any
207 # way on the diagnostic format.
208
209 for a in neqA:
210 res.append((a, None, None))
211 for b in neqB:
212 res.append((None, b, None))
213
214 return res
215
Anna Zaks7acc4072012-07-16 20:21:42 +0000216def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000217 # Load the run results.
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000218 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
219 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000220
221 # Open the verbose log, if given.
222 if opts.verboseLog:
223 auxLog = open(opts.verboseLog, "wb")
224 else:
225 auxLog = None
226
227 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000228 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000229 for res in diff:
230 a,b,confidence = res
231 if a is None:
232 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000233 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000234 if auxLog:
235 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000236 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000237 elif b is None:
238 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000239 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000240 if auxLog:
241 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000242 a.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000243 elif confidence:
244 print "CHANGED: %r to %r" % (a.getReadableName(),
245 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000246 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000247 if auxLog:
248 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
249 % (a.getReadableName(),
250 b.getReadableName(),
Anna Zaks7acc4072012-07-16 20:21:42 +0000251 a.getReport(),
252 b.getReport()))
Anna Zaks544055f2011-09-12 21:32:41 +0000253 else:
254 pass
255
Anna Zaksa7a25642011-11-08 19:56:31 +0000256 TotalReports = len(resultsB.diagnostics)
257 print "TOTAL REPORTS: %r" % TotalReports
258 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000259 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000260 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
261 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
262
Anna Zaks544055f2011-09-12 21:32:41 +0000263 return foundDiffs
264
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000265def main():
266 from optparse import OptionParser
267 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
Anna Zaks2a84b8b2012-08-02 00:41:40 +0000268 parser.add_option("", "--rootA", dest="rootA",
269 help="Prefix to ignore on source files for directory A",
270 action="store", type=str, default="")
271 parser.add_option("", "--rootB", dest="rootB",
272 help="Prefix to ignore on source files for directory B",
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000273 action="store", type=str, default="")
274 parser.add_option("", "--verbose-log", dest="verboseLog",
275 help="Write additional information to LOG [default=None]",
276 action="store", type=str, default=None,
277 metavar="LOG")
278 (opts, args) = parser.parse_args()
279
280 if len(args) != 2:
281 parser.error("invalid number of arguments")
282
283 dirA,dirB = args
284
Jordan Roseac57d472012-07-26 20:03:51 +0000285 dumpScanBuildResultsDiff(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000286
287if __name__ == '__main__':
288 main()