blob: 220045319facf67eacaa64a6c434a8dc829630aa [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22 def __init__(self, elts=()):
23 self.data = {}
24 for key,value in elts:
25 self[key] = value
26
27 def __getitem__(self, item):
28 return self.data[item]
29 def __setitem__(self, key, value):
30 if key in self.data:
31 self.data[key].append(value)
32 else:
33 self.data[key] = [value]
34 def items(self):
35 return self.data.items()
36 def values(self):
37 return self.data.values()
38 def keys(self):
39 return self.data.keys()
40 def __len__(self):
41 return len(self.data)
42 def get(self, key, default=None):
43 return self.data.get(key, default)
44
45#
46
Anna Zaks544055f2011-09-12 21:32:41 +000047class CmpOptions:
48 def __init__(self, verboseLog=None, root=""):
49 self.root = root
50 self.verboseLog = verboseLog
51
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000052class AnalysisReport:
53 def __init__(self, run, files):
54 self.run = run
55 self.files = files
56
57class AnalysisDiagnostic:
58 def __init__(self, data, report, htmlReport):
59 self.data = data
60 self.report = report
61 self.htmlReport = htmlReport
62
63 def getReadableName(self):
64 loc = self.data['location']
65 filename = self.report.run.getSourceName(self.report.files[loc['file']])
66 line = loc['line']
67 column = loc['col']
Anna Zaksab2d46e2012-01-24 21:57:32 +000068 category = self.data['category']
69 description = self.data['description']
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000070
71 # FIXME: Get a report number based on this key, to 'distinguish'
72 # reports, or something.
73
Anna Zaksab2d46e2012-01-24 21:57:32 +000074 return '%s:%d:%d, %s: %s' % (filename, line, column, category,
75 description)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000076
77 def getReportData(self):
78 if self.htmlReport is None:
Anna Zaksab2d46e2012-01-24 21:57:32 +000079 return " "
Anna Zaks2c3038e2012-01-24 21:57:35 +000080 return os.path.join(self.report.run.path, self.htmlReport)
81 # We could also dump the report with:
82 # return open(os.path.join(self.report.run.path,
83 # self.htmlReport), "rb").read()
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000084
85class AnalysisRun:
86 def __init__(self, path, opts):
87 self.path = path
88 self.reports = []
89 self.diagnostics = []
90 self.opts = opts
91
92 def getSourceName(self, path):
93 if path.startswith(self.opts.root):
94 return path[len(self.opts.root):]
95 return path
96
Anna Zaksf95a2012011-09-12 22:40:36 +000097def loadResults(path, opts, deleteEmpty=True):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000098 run = AnalysisRun(path, opts)
99
100 for f in os.listdir(path):
101 if (not f.startswith('report') or
102 not f.endswith('plist')):
103 continue
104
105 p = os.path.join(path, f)
106 data = plistlib.readPlist(p)
107
Anna Zaksf95a2012011-09-12 22:40:36 +0000108 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000109 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000110 if deleteEmpty == True:
111 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000112 continue
113
114 # Extract the HTML reports, if they exists.
115 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
116 htmlFiles = []
117 for d in data['diagnostics']:
118 # FIXME: Why is this named files, when does it have multiple
119 # files?
120 assert len(d['HTMLDiagnostics_files']) == 1
121 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
122 else:
123 htmlFiles = [None] * len(data['diagnostics'])
124
125 report = AnalysisReport(run, data.pop('files'))
126 diagnostics = [AnalysisDiagnostic(d, report, h)
127 for d,h in zip(data.pop('diagnostics'),
128 htmlFiles)]
129
130 assert not data
131
132 run.reports.append(report)
133 run.diagnostics.extend(diagnostics)
134
135 return run
136
Anna Zaks19b17cb2012-06-08 01:50:49 +0000137def getIssueIdentifier(d) :
138 id = ''
139 if 'issue_context' in d.data :
140 id += d.data['issue_context']
141 if 'issue_hash' in d.data :
142 id += str(d.data['issue_hash'])
143 return id
144
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000145def compareResults(A, B):
146 """
147 compareResults - Generate a relation from diagnostics in run A to
148 diagnostics in run B.
149
150 The result is the relation as a list of triples (a, b, confidence) where
151 each element {a,b} is None or an element from the respective run, and
152 confidence is a measure of the match quality (where 0 indicates equality,
153 and None is used if either element is None).
154 """
155
156 res = []
157
158 # Quickly eliminate equal elements.
159 neqA = []
160 neqB = []
161 eltsA = list(A.diagnostics)
162 eltsB = list(B.diagnostics)
Anna Zaks19b17cb2012-06-08 01:50:49 +0000163 eltsA.sort(key = getIssueIdentifier)
164 eltsB.sort(key = getIssueIdentifier)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000165 while eltsA and eltsB:
166 a = eltsA.pop()
167 b = eltsB.pop()
Anna Zaks19b17cb2012-06-08 01:50:49 +0000168 if (getIssueIdentifier(a) == getIssueIdentifier(b)) :
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000169 res.append((a, b, 0))
170 elif a.data > b.data:
171 neqA.append(a)
172 eltsB.append(b)
173 else:
174 neqB.append(b)
175 eltsA.append(a)
176 neqA.extend(eltsA)
177 neqB.extend(eltsB)
178
179 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
180 # to bin the diagnostics, print them in a normalized form (based solely on
181 # the structure of the diagnostic), compute the diff, then use that as the
182 # basis for matching. This has the nice property that we don't depend in any
183 # way on the diagnostic format.
184
185 for a in neqA:
186 res.append((a, None, None))
187 for b in neqB:
188 res.append((None, b, None))
189
190 return res
191
Anna Zaksf95a2012011-09-12 22:40:36 +0000192def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000193 # Load the run results.
Anna Zaksf95a2012011-09-12 22:40:36 +0000194 resultsA = loadResults(dirA, opts, deleteEmpty)
195 resultsB = loadResults(dirB, opts, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000196
197 # Open the verbose log, if given.
198 if opts.verboseLog:
199 auxLog = open(opts.verboseLog, "wb")
200 else:
201 auxLog = None
202
203 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000204 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000205 for res in diff:
206 a,b,confidence = res
207 if a is None:
208 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000209 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000210 if auxLog:
211 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
212 b.getReportData()))
213 elif b is None:
214 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000215 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000216 if auxLog:
217 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
218 a.getReportData()))
219 elif confidence:
220 print "CHANGED: %r to %r" % (a.getReadableName(),
221 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000222 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000223 if auxLog:
224 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
225 % (a.getReadableName(),
226 b.getReadableName(),
227 a.getReportData(),
228 b.getReportData()))
229 else:
230 pass
231
Anna Zaksa7a25642011-11-08 19:56:31 +0000232 TotalReports = len(resultsB.diagnostics)
233 print "TOTAL REPORTS: %r" % TotalReports
234 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000235 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000236 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
237 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
238
Anna Zaks544055f2011-09-12 21:32:41 +0000239 return foundDiffs
240
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000241def main():
242 from optparse import OptionParser
243 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
244 parser.add_option("", "--root", dest="root",
245 help="Prefix to ignore on source files",
246 action="store", type=str, default="")
247 parser.add_option("", "--verbose-log", dest="verboseLog",
248 help="Write additional information to LOG [default=None]",
249 action="store", type=str, default=None,
250 metavar="LOG")
251 (opts, args) = parser.parse_args()
252
253 if len(args) != 2:
254 parser.error("invalid number of arguments")
255
256 dirA,dirB = args
257
Anna Zaks544055f2011-09-12 21:32:41 +0000258 cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000259
260if __name__ == '__main__':
261 main()