blob: 6d4be712994e649ed76205da363f12276f2d48b4 [file] [log] [blame]
Daniel Dunbar301f7ac2009-08-06 21:15:33 +00001#!/usr/bin/env python
2
3"""
4CmpRuns - A simple tool for comparing two static analyzer runs to determine
5which reports have been added, removed, or changed.
6
7This is designed to support automated testing using the static analyzer, from
8two perspectives:
9 1. To monitor changes in the static analyzer's reports on real code bases, for
10 regression testing.
11
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
14"""
15
16import os
17import plistlib
18
19#
20
21class multidict:
22 def __init__(self, elts=()):
23 self.data = {}
24 for key,value in elts:
25 self[key] = value
26
27 def __getitem__(self, item):
28 return self.data[item]
29 def __setitem__(self, key, value):
30 if key in self.data:
31 self.data[key].append(value)
32 else:
33 self.data[key] = [value]
34 def items(self):
35 return self.data.items()
36 def values(self):
37 return self.data.values()
38 def keys(self):
39 return self.data.keys()
40 def __len__(self):
41 return len(self.data)
42 def get(self, key, default=None):
43 return self.data.get(key, default)
44
45#
46
Anna Zaks544055f2011-09-12 21:32:41 +000047class CmpOptions:
48 def __init__(self, verboseLog=None, root=""):
49 self.root = root
50 self.verboseLog = verboseLog
51
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000052class AnalysisReport:
53 def __init__(self, run, files):
54 self.run = run
55 self.files = files
56
57class AnalysisDiagnostic:
58 def __init__(self, data, report, htmlReport):
59 self.data = data
60 self.report = report
61 self.htmlReport = htmlReport
62
63 def getReadableName(self):
64 loc = self.data['location']
65 filename = self.report.run.getSourceName(self.report.files[loc['file']])
66 line = loc['line']
67 column = loc['col']
Anna Zaksab2d46e2012-01-24 21:57:32 +000068 category = self.data['category']
69 description = self.data['description']
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000070
71 # FIXME: Get a report number based on this key, to 'distinguish'
72 # reports, or something.
73
Anna Zaksab2d46e2012-01-24 21:57:32 +000074 return '%s:%d:%d, %s: %s' % (filename, line, column, category,
75 description)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000076
77 def getReportData(self):
78 if self.htmlReport is None:
Anna Zaksab2d46e2012-01-24 21:57:32 +000079 return " "
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000080
81 return open(os.path.join(self.report.run.path,
82 self.htmlReport), "rb").read()
83
84class AnalysisRun:
85 def __init__(self, path, opts):
86 self.path = path
87 self.reports = []
88 self.diagnostics = []
89 self.opts = opts
90
91 def getSourceName(self, path):
92 if path.startswith(self.opts.root):
93 return path[len(self.opts.root):]
94 return path
95
Anna Zaksf95a2012011-09-12 22:40:36 +000096def loadResults(path, opts, deleteEmpty=True):
Daniel Dunbar301f7ac2009-08-06 21:15:33 +000097 run = AnalysisRun(path, opts)
98
99 for f in os.listdir(path):
100 if (not f.startswith('report') or
101 not f.endswith('plist')):
102 continue
103
104 p = os.path.join(path, f)
105 data = plistlib.readPlist(p)
106
Anna Zaksf95a2012011-09-12 22:40:36 +0000107 # Ignore/delete empty reports.
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000108 if not data['files']:
Anna Zaksf95a2012011-09-12 22:40:36 +0000109 if deleteEmpty == True:
110 os.remove(p)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000111 continue
112
113 # Extract the HTML reports, if they exists.
114 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
115 htmlFiles = []
116 for d in data['diagnostics']:
117 # FIXME: Why is this named files, when does it have multiple
118 # files?
119 assert len(d['HTMLDiagnostics_files']) == 1
120 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
121 else:
122 htmlFiles = [None] * len(data['diagnostics'])
123
124 report = AnalysisReport(run, data.pop('files'))
125 diagnostics = [AnalysisDiagnostic(d, report, h)
126 for d,h in zip(data.pop('diagnostics'),
127 htmlFiles)]
128
129 assert not data
130
131 run.reports.append(report)
132 run.diagnostics.extend(diagnostics)
133
134 return run
135
136def compareResults(A, B):
137 """
138 compareResults - Generate a relation from diagnostics in run A to
139 diagnostics in run B.
140
141 The result is the relation as a list of triples (a, b, confidence) where
142 each element {a,b} is None or an element from the respective run, and
143 confidence is a measure of the match quality (where 0 indicates equality,
144 and None is used if either element is None).
145 """
146
147 res = []
148
149 # Quickly eliminate equal elements.
150 neqA = []
151 neqB = []
152 eltsA = list(A.diagnostics)
153 eltsB = list(B.diagnostics)
154 eltsA.sort(key = lambda d: d.data)
155 eltsB.sort(key = lambda d: d.data)
156 while eltsA and eltsB:
157 a = eltsA.pop()
158 b = eltsB.pop()
Anna Zakse1e73672011-11-05 05:20:56 +0000159 if a.data['location'] == b.data['location']:
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000160 res.append((a, b, 0))
161 elif a.data > b.data:
162 neqA.append(a)
163 eltsB.append(b)
164 else:
165 neqB.append(b)
166 eltsA.append(a)
167 neqA.extend(eltsA)
168 neqB.extend(eltsB)
169
170 # FIXME: Add fuzzy matching. One simple and possible effective idea would be
171 # to bin the diagnostics, print them in a normalized form (based solely on
172 # the structure of the diagnostic), compute the diff, then use that as the
173 # basis for matching. This has the nice property that we don't depend in any
174 # way on the diagnostic format.
175
176 for a in neqA:
177 res.append((a, None, None))
178 for b in neqB:
179 res.append((None, b, None))
180
181 return res
182
Anna Zaksf95a2012011-09-12 22:40:36 +0000183def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
Anna Zaks544055f2011-09-12 21:32:41 +0000184 # Load the run results.
Anna Zaksf95a2012011-09-12 22:40:36 +0000185 resultsA = loadResults(dirA, opts, deleteEmpty)
186 resultsB = loadResults(dirB, opts, deleteEmpty)
Anna Zaks544055f2011-09-12 21:32:41 +0000187
188 # Open the verbose log, if given.
189 if opts.verboseLog:
190 auxLog = open(opts.verboseLog, "wb")
191 else:
192 auxLog = None
193
194 diff = compareResults(resultsA, resultsB)
Anna Zaksa7a25642011-11-08 19:56:31 +0000195 foundDiffs = 0
Anna Zaks544055f2011-09-12 21:32:41 +0000196 for res in diff:
197 a,b,confidence = res
198 if a is None:
199 print "ADDED: %r" % b.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000200 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000201 if auxLog:
202 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
203 b.getReportData()))
204 elif b is None:
205 print "REMOVED: %r" % a.getReadableName()
Anna Zaksa7a25642011-11-08 19:56:31 +0000206 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000207 if auxLog:
208 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
209 a.getReportData()))
210 elif confidence:
211 print "CHANGED: %r to %r" % (a.getReadableName(),
212 b.getReadableName())
Anna Zaksa7a25642011-11-08 19:56:31 +0000213 foundDiffs += 1
Anna Zaks544055f2011-09-12 21:32:41 +0000214 if auxLog:
215 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
216 % (a.getReadableName(),
217 b.getReadableName(),
218 a.getReportData(),
219 b.getReportData()))
220 else:
221 pass
222
Anna Zaksa7a25642011-11-08 19:56:31 +0000223 TotalReports = len(resultsB.diagnostics)
224 print "TOTAL REPORTS: %r" % TotalReports
225 print "TOTAL DIFFERENCES: %r" % foundDiffs
Anna Zaks544055f2011-09-12 21:32:41 +0000226 if auxLog:
Anna Zaksa7a25642011-11-08 19:56:31 +0000227 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
228 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
229
Anna Zaks544055f2011-09-12 21:32:41 +0000230 return foundDiffs
231
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000232def main():
233 from optparse import OptionParser
234 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
235 parser.add_option("", "--root", dest="root",
236 help="Prefix to ignore on source files",
237 action="store", type=str, default="")
238 parser.add_option("", "--verbose-log", dest="verboseLog",
239 help="Write additional information to LOG [default=None]",
240 action="store", type=str, default=None,
241 metavar="LOG")
242 (opts, args) = parser.parse_args()
243
244 if len(args) != 2:
245 parser.error("invalid number of arguments")
246
247 dirA,dirB = args
248
Anna Zaks544055f2011-09-12 21:32:41 +0000249 cmpScanBuildResults(dirA, dirB, opts)
Daniel Dunbar301f7ac2009-08-06 21:15:33 +0000250
251if __name__ == '__main__':
252 main()