blob: e68c45df184af086888e991df9d5cc3c4acf1136 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22 def __init__(self, elts=()):
23 self.data = {}
24 for key,value in elts:
25 self[key] = value
26
27 def __getitem__(self, item):
28 return self.data[item]
29 def __setitem__(self, key, value):
30 if key in self.data:
31 self.data[key].append(value)
32 else:
33 self.data[key] = [value]
34 def items(self):
35 return self.data.items()
36 def values(self):
37 return self.data.values()
38 def keys(self):
39 return self.data.keys()
40 def __len__(self):
41 return len(self.data)
42 def get(self, key, default=None):
43 return self.data.get(key, default)
44
45#
46
Anna Zaks544055f2011-09-12 21:32:41 +000047class CmpOptions:
48 def __init__(self, verboseLog=None, root=""):
49 self.root = root
50 self.verboseLog = verboseLog
51
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000052class AnalysisReport:
53 def __init__(self, run, files):
54 self.run = run
55 self.files = files
56
57class AnalysisDiagnostic:
58 def __init__(self, data, report, htmlReport):
59 self.data = data
60 self.report = report
61 self.htmlReport = htmlReport
62
63 def getReadableName(self):
64 loc = self.data['location']
65 filename = self.report.run.getSourceName(self.report.files[loc['file']])
66 line = loc['line']
67 column = loc['col']
Anna Zaksab2d46e2012-01-24 21:57:32 +000068 category = self.data['category']
69 description = self.data['description']
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000070
71 # FIXME: Get a report number based on this key, to 'distinguish'
72 # reports, or something.
73
Anna Zaksab2d46e2012-01-24 21:57:32 +000074 return '%s:%d:%d, %s: %s' % (filename, line, column, category,
75 description)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000076
77 def getReportData(self):
78 if self.htmlReport is None:
Anna Zaksab2d46e2012-01-24 21:57:32 +000079 return " "
Anna Zaks2c3038e2012-01-24 21:57:35 +000080 return os.path.join(self.report.run.path, self.htmlReport)
81 # We could also dump the report with:
82 # return open(os.path.join(self.report.run.path,
83 # self.htmlReport), "rb").read()
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000084
85class AnalysisRun:
86 def __init__(self, path, opts):
87 self.path = path
88 self.reports = []
89 self.diagnostics = []
90 self.opts = opts
91
92 def getSourceName(self, path):
93 if path.startswith(self.opts.root):
94 return path[len(self.opts.root):]
95 return path
96
Anna Zaksf95a2012011-09-12 22:40:36 +000097def loadResults(path, opts, deleteEmpty=True):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000098 run = AnalysisRun(path, opts)
99
100 for f in os.listdir(path):
101 if (not f.startswith('report') or
102 not f.endswith('plist')):
103 continue
104
105 p = os.path.join(path, f)
106 data = plistlib.readPlist(p)
107
Anna Zaksf95a2012011-09-12 22:40:36 +0000108 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000109 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000110 if deleteEmpty == True:
111 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000112 continue
113
114 # Extract the HTML reports, if they exists.
115 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
116 htmlFiles = []
117 for d in data['diagnostics']:
118 # FIXME: Why is this named files, when does it have multiple
119 # files?
120 assert len(d['HTMLDiagnostics_files']) == 1
121 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
122 else:
123 htmlFiles = [None] * len(data['diagnostics'])
124
125 report = AnalysisReport(run, data.pop('files'))
126 diagnostics = [AnalysisDiagnostic(d, report, h)
127 for d,h in zip(data.pop('diagnostics'),
128 htmlFiles)]
129
130 assert not data
131
132 run.reports.append(report)
133 run.diagnostics.extend(diagnostics)
134
135 return run
136
137def compareResults(A, B):
138 """
139 compareResults - Generate a relation from diagnostics in run A to
140 diagnostics in run B.
141
142 The result is the relation as a list of triples (a, b, confidence) where
143 each element {a,b} is None or an element from the respective run, and
144 confidence is a measure of the match quality (where 0 indicates equality,
145 and None is used if either element is None).
146 """
147
148 res = []
149
150 # Quickly eliminate equal elements.
151 neqA = []
152 neqB = []
153 eltsA = list(A.diagnostics)
154 eltsB = list(B.diagnostics)
155 eltsA.sort(key = lambda d: d.data)
156 eltsB.sort(key = lambda d: d.data)
157 while eltsA and eltsB:
158 a = eltsA.pop()
159 b = eltsB.pop()
Anna Zakse1e73672011-11-05 05:20:56 +0000160 if a.data['location'] == b.data['location']:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000161 res.append((a, b, 0))
162 elif a.data > b.data:
163 neqA.append(a)
164 eltsB.append(b)
165 else:
166 neqB.append(b)
167 eltsA.append(a)
168 neqA.extend(eltsA)
169 neqB.extend(eltsB)
170
171 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
172 # to bin the diagnostics, print them in a normalized form (based solely on
173 # the structure of the diagnostic), compute the diff, then use that as the
174 # basis for matching. This has the nice property that we don't depend in any
175 # way on the diagnostic format.
176
177 for a in neqA:
178 res.append((a, None, None))
179 for b in neqB:
180 res.append((None, b, None))
181
182 return res
183
Anna Zaksf95a2012011-09-12 22:40:36 +0000184def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000185 # Load the run results.
Anna Zaksf95a2012011-09-12 22:40:36 +0000186 resultsA = loadResults(dirA, opts, deleteEmpty)
187 resultsB = loadResults(dirB, opts, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000188
189 # Open the verbose log, if given.
190 if opts.verboseLog:
191 auxLog = open(opts.verboseLog, "wb")
192 else:
193 auxLog = None
194
195 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000196 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000197 for res in diff:
198 a,b,confidence = res
199 if a is None:
200 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000201 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000202 if auxLog:
203 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
204 b.getReportData()))
205 elif b is None:
206 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000207 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000208 if auxLog:
209 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
210 a.getReportData()))
211 elif confidence:
212 print "CHANGED: %r to %r" % (a.getReadableName(),
213 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000214 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000215 if auxLog:
216 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
217 % (a.getReadableName(),
218 b.getReadableName(),
219 a.getReportData(),
220 b.getReportData()))
221 else:
222 pass
223
Anna Zaksa7a25642011-11-08 19:56:31 +0000224 TotalReports = len(resultsB.diagnostics)
225 print "TOTAL REPORTS: %r" % TotalReports
226 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000227 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000228 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
229 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
230
Anna Zaks544055f2011-09-12 21:32:41 +0000231 return foundDiffs
232
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000233def main():
234 from optparse import OptionParser
235 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
236 parser.add_option("", "--root", dest="root",
237 help="Prefix to ignore on source files",
238 action="store", type=str, default="")
239 parser.add_option("", "--verbose-log", dest="verboseLog",
240 help="Write additional information to LOG [default=None]",
241 action="store", type=str, default=None,
242 metavar="LOG")
243 (opts, args) = parser.parse_args()
244
245 if len(args) != 2:
246 parser.error("invalid number of arguments")
247
248 dirA,dirB = args
249
Anna Zaks544055f2011-09-12 21:32:41 +0000250 cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000251
252if __name__ == '__main__':
253 main()