Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | CmpRuns - A simple tool for comparing two static analyzer runs to determine |
| 5 | which reports have been added, removed, or changed. |
| 6 | |
| 7 | This is designed to support automated testing using the static analyzer, from |
| 8 | two perspectives: |
| 9 | 1. To monitor changes in the static analyzer's reports on real code bases, for |
| 10 | regression testing. |
| 11 | |
| 12 | 2. For use by end users who want to integrate regular static analyzer testing |
| 13 | into a buildbot like environment. |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 14 | |
| 15 | Usage: |
| 16 | |
| 17 | # Load the results of both runs, to obtain lists of the corresponding |
| 18 | # AnalysisDiagnostic objects. |
Anna Zaks | 2a84b8b | 2012-08-02 00:41:40 +0000 | [diff] [blame] | 19 | # |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 20 | resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty) |
| 21 | resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty) |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 22 | |
| 23 | # Generate a relation from diagnostics in run A to diagnostics in run B |
| 24 | # to obtain a list of triples (a, b, confidence). |
| 25 | diff = compareResults(resultsA, resultsB) |
| 26 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 27 | """ |
| 28 | |
| 29 | import os |
| 30 | import plistlib |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 31 | import CmpRuns |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 32 | |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 33 | # Information about analysis run: |
| 34 | # path - the analysis output directory |
| 35 | # root - the name of the root directory, which will be disregarded when |
| 36 | # determining the source file name |
| 37 | class SingleRunInfo: |
| 38 | def __init__(self, path, root="", verboseLog=None): |
| 39 | self.path = path |
| 40 | self.root = root |
| 41 | self.verboseLog = verboseLog |
| 42 | |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 43 | class AnalysisDiagnostic: |
| 44 | def __init__(self, data, report, htmlReport): |
| 45 | self._data = data |
| 46 | self._loc = self._data['location'] |
| 47 | self._report = report |
| 48 | self._htmlReport = htmlReport |
| 49 | |
| 50 | def getFileName(self): |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 51 | root = self._report.run.root |
Anna Zaks | 20d9281 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 52 | fileName = self._report.files[self._loc['file']] |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 53 | if fileName.startswith(root) : |
| 54 | return fileName[len(root):] |
| 55 | return fileName |
| 56 | |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 57 | def getLine(self): |
| 58 | return self._loc['line'] |
| 59 | |
| 60 | def getColumn(self): |
| 61 | return self._loc['col'] |
| 62 | |
| 63 | def getCategory(self): |
| 64 | return self._data['category'] |
| 65 | |
| 66 | def getDescription(self): |
| 67 | return self._data['description'] |
| 68 | |
| 69 | def getIssueIdentifier(self) : |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 70 | id = self.getFileName() + "+" |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 71 | if 'issue_context' in self._data : |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 72 | id += self._data['issue_context'] + "+" |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 73 | if 'issue_hash' in self._data : |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 74 | id += str(self._data['issue_hash']) |
| 75 | return id |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 76 | |
| 77 | def getReport(self): |
| 78 | if self._htmlReport is None: |
| 79 | return " " |
| 80 | return os.path.join(self._report.run.path, self._htmlReport) |
| 81 | |
| 82 | def getReadableName(self): |
| 83 | return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(), |
| 84 | self.getColumn(), self.getCategory(), |
| 85 | self.getDescription()) |
Anna Zaks | 20d9281 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 86 | |
| 87 | # Note, the data format is not an API and may change from one analyzer |
| 88 | # version to another. |
| 89 | def getRawData(self): |
| 90 | return self._data |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 91 | |
| 92 | class multidict: |
| 93 | def __init__(self, elts=()): |
| 94 | self.data = {} |
| 95 | for key,value in elts: |
| 96 | self[key] = value |
| 97 | |
| 98 | def __getitem__(self, item): |
| 99 | return self.data[item] |
| 100 | def __setitem__(self, key, value): |
| 101 | if key in self.data: |
| 102 | self.data[key].append(value) |
| 103 | else: |
| 104 | self.data[key] = [value] |
| 105 | def items(self): |
| 106 | return self.data.items() |
| 107 | def values(self): |
| 108 | return self.data.values() |
| 109 | def keys(self): |
| 110 | return self.data.keys() |
| 111 | def __len__(self): |
| 112 | return len(self.data) |
| 113 | def get(self, key, default=None): |
| 114 | return self.data.get(key, default) |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 115 | |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 116 | class CmpOptions: |
Anna Zaks | 2a84b8b | 2012-08-02 00:41:40 +0000 | [diff] [blame] | 117 | def __init__(self, verboseLog=None, rootA="", rootB=""): |
| 118 | self.rootA = rootA |
| 119 | self.rootB = rootB |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 120 | self.verboseLog = verboseLog |
| 121 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 122 | class AnalysisReport: |
Anna Zaks | f002496 | 2012-11-15 22:42:44 +0000 | [diff] [blame] | 123 | def __init__(self, run, files): |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 124 | self.run = run |
Anna Zaks | 20d9281 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 125 | self.files = files |
| 126 | self.diagnostics = [] |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 127 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 128 | class AnalysisRun: |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 129 | def __init__(self, info): |
| 130 | self.path = info.path |
| 131 | self.root = info.root |
| 132 | self.info = info |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 133 | self.reports = [] |
Anna Zaks | 20d9281 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 134 | # Cumulative list of all diagnostics from all the reports. |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 135 | self.diagnostics = [] |
Anna Zaks | f002496 | 2012-11-15 22:42:44 +0000 | [diff] [blame] | 136 | self.clang_version = None |
| 137 | |
| 138 | def getClangVersion(self): |
| 139 | return self.clang_version |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 140 | |
Jordan Rose | e30024c | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 141 | def readSingleFile(self, p, deleteEmpty): |
| 142 | data = plistlib.readPlist(p) |
| 143 | |
| 144 | # We want to retrieve the clang version even if there are no |
| 145 | # reports. Assume that all reports were created using the same |
| 146 | # clang version (this is always true and is more efficient). |
| 147 | if 'clang_version' in data: |
| 148 | if self.clang_version == None: |
| 149 | self.clang_version = data.pop('clang_version') |
| 150 | else: |
| 151 | data.pop('clang_version') |
| 152 | |
| 153 | # Ignore/delete empty reports. |
| 154 | if not data['files']: |
| 155 | if deleteEmpty == True: |
| 156 | os.remove(p) |
| 157 | return |
| 158 | |
| 159 | # Extract the HTML reports, if they exists. |
| 160 | if 'HTMLDiagnostics_files' in data['diagnostics'][0]: |
| 161 | htmlFiles = [] |
| 162 | for d in data['diagnostics']: |
| 163 | # FIXME: Why is this named files, when does it have multiple |
| 164 | # files? |
| 165 | assert len(d['HTMLDiagnostics_files']) == 1 |
| 166 | htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) |
| 167 | else: |
| 168 | htmlFiles = [None] * len(data['diagnostics']) |
| 169 | |
| 170 | report = AnalysisReport(self, data.pop('files')) |
| 171 | diagnostics = [AnalysisDiagnostic(d, report, h) |
| 172 | for d,h in zip(data.pop('diagnostics'), |
| 173 | htmlFiles)] |
| 174 | |
| 175 | assert not data |
| 176 | |
| 177 | report.diagnostics.extend(diagnostics) |
| 178 | self.reports.append(report) |
| 179 | self.diagnostics.extend(diagnostics) |
| 180 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 181 | |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 182 | # Backward compatibility API. |
Anna Zaks | 2a84b8b | 2012-08-02 00:41:40 +0000 | [diff] [blame] | 183 | def loadResults(path, opts, root = "", deleteEmpty=True): |
Anna Zaks | 8176557 | 2012-10-15 22:48:21 +0000 | [diff] [blame] | 184 | return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog), |
| 185 | deleteEmpty) |
| 186 | |
| 187 | # Load results of the analyzes from a given output folder. |
| 188 | # - info is the SingleRunInfo object |
| 189 | # - deleteEmpty specifies if the empty plist files should be deleted |
| 190 | def loadResultsFromSingleRun(info, deleteEmpty=True): |
| 191 | path = info.path |
| 192 | run = AnalysisRun(info) |
Jordan Rose | e30024c | 2013-03-23 01:21:26 +0000 | [diff] [blame] | 193 | |
| 194 | if os.path.isfile(path): |
| 195 | run.readSingleFile(path, deleteEmpty) |
| 196 | else: |
| 197 | for (dirpath, dirnames, filenames) in os.walk(path): |
| 198 | for f in filenames: |
| 199 | if (not f.endswith('plist')): |
| 200 | continue |
| 201 | p = os.path.join(dirpath, f) |
| 202 | run.readSingleFile(p, deleteEmpty) |
| 203 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 204 | return run |
| 205 | |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 206 | def cmpAnalysisDiagnostic(d) : |
| 207 | return d.getIssueIdentifier() |
Anna Zaks | 19b17cb | 2012-06-08 01:50:49 +0000 | [diff] [blame] | 208 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 209 | def compareResults(A, B): |
| 210 | """ |
| 211 | compareResults - Generate a relation from diagnostics in run A to |
| 212 | diagnostics in run B. |
| 213 | |
| 214 | The result is the relation as a list of triples (a, b, confidence) where |
| 215 | each element {a,b} is None or an element from the respective run, and |
| 216 | confidence is a measure of the match quality (where 0 indicates equality, |
| 217 | and None is used if either element is None). |
| 218 | """ |
| 219 | |
| 220 | res = [] |
| 221 | |
| 222 | # Quickly eliminate equal elements. |
| 223 | neqA = [] |
| 224 | neqB = [] |
| 225 | eltsA = list(A.diagnostics) |
| 226 | eltsB = list(B.diagnostics) |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 227 | eltsA.sort(key = cmpAnalysisDiagnostic) |
| 228 | eltsB.sort(key = cmpAnalysisDiagnostic) |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 229 | while eltsA and eltsB: |
| 230 | a = eltsA.pop() |
| 231 | b = eltsB.pop() |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 232 | if (a.getIssueIdentifier() == b.getIssueIdentifier()) : |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 233 | res.append((a, b, 0)) |
Anna Zaks | 20d9281 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 234 | elif a.getIssueIdentifier() > b.getIssueIdentifier(): |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 235 | eltsB.append(b) |
Anna Zaks | 20d9281 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 236 | neqA.append(a) |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 237 | else: |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 238 | eltsA.append(a) |
Anna Zaks | 20d9281 | 2012-10-17 21:09:26 +0000 | [diff] [blame] | 239 | neqB.append(b) |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 240 | neqA.extend(eltsA) |
| 241 | neqB.extend(eltsB) |
| 242 | |
| 243 | # FIXME: Add fuzzy matching. One simple and possible effective idea would be |
| 244 | # to bin the diagnostics, print them in a normalized form (based solely on |
| 245 | # the structure of the diagnostic), compute the diff, then use that as the |
| 246 | # basis for matching. This has the nice property that we don't depend in any |
| 247 | # way on the diagnostic format. |
| 248 | |
| 249 | for a in neqA: |
| 250 | res.append((a, None, None)) |
| 251 | for b in neqB: |
| 252 | res.append((None, b, None)) |
| 253 | |
| 254 | return res |
| 255 | |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 256 | def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True): |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 257 | # Load the run results. |
Anna Zaks | 2a84b8b | 2012-08-02 00:41:40 +0000 | [diff] [blame] | 258 | resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty) |
| 259 | resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty) |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 260 | |
| 261 | # Open the verbose log, if given. |
| 262 | if opts.verboseLog: |
| 263 | auxLog = open(opts.verboseLog, "wb") |
| 264 | else: |
| 265 | auxLog = None |
| 266 | |
| 267 | diff = compareResults(resultsA, resultsB) |
Anna Zaks | a7a2564 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 268 | foundDiffs = 0 |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 269 | for res in diff: |
| 270 | a,b,confidence = res |
| 271 | if a is None: |
| 272 | print "ADDED: %r" % b.getReadableName() |
Anna Zaks | a7a2564 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 273 | foundDiffs += 1 |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 274 | if auxLog: |
| 275 | print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 276 | b.getReport())) |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 277 | elif b is None: |
| 278 | print "REMOVED: %r" % a.getReadableName() |
Anna Zaks | a7a2564 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 279 | foundDiffs += 1 |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 280 | if auxLog: |
| 281 | print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 282 | a.getReport())) |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 283 | elif confidence: |
| 284 | print "CHANGED: %r to %r" % (a.getReadableName(), |
| 285 | b.getReadableName()) |
Anna Zaks | a7a2564 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 286 | foundDiffs += 1 |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 287 | if auxLog: |
| 288 | print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" |
| 289 | % (a.getReadableName(), |
| 290 | b.getReadableName(), |
Anna Zaks | 7acc407 | 2012-07-16 20:21:42 +0000 | [diff] [blame] | 291 | a.getReport(), |
| 292 | b.getReport())) |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 293 | else: |
| 294 | pass |
| 295 | |
Anna Zaks | a7a2564 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 296 | TotalReports = len(resultsB.diagnostics) |
| 297 | print "TOTAL REPORTS: %r" % TotalReports |
| 298 | print "TOTAL DIFFERENCES: %r" % foundDiffs |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 299 | if auxLog: |
Anna Zaks | a7a2564 | 2011-11-08 19:56:31 +0000 | [diff] [blame] | 300 | print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports |
| 301 | print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs |
| 302 | |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 303 | return foundDiffs |
| 304 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 305 | def main(): |
| 306 | from optparse import OptionParser |
| 307 | parser = OptionParser("usage: %prog [options] [dir A] [dir B]") |
Anna Zaks | 2a84b8b | 2012-08-02 00:41:40 +0000 | [diff] [blame] | 308 | parser.add_option("", "--rootA", dest="rootA", |
| 309 | help="Prefix to ignore on source files for directory A", |
| 310 | action="store", type=str, default="") |
| 311 | parser.add_option("", "--rootB", dest="rootB", |
| 312 | help="Prefix to ignore on source files for directory B", |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 313 | action="store", type=str, default="") |
| 314 | parser.add_option("", "--verbose-log", dest="verboseLog", |
| 315 | help="Write additional information to LOG [default=None]", |
| 316 | action="store", type=str, default=None, |
| 317 | metavar="LOG") |
| 318 | (opts, args) = parser.parse_args() |
| 319 | |
| 320 | if len(args) != 2: |
| 321 | parser.error("invalid number of arguments") |
| 322 | |
| 323 | dirA,dirB = args |
| 324 | |
Jordan Rose | ac57d47 | 2012-07-26 20:03:51 +0000 | [diff] [blame] | 325 | dumpScanBuildResultsDiff(dirA, dirB, opts) |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 326 | |
| 327 | if __name__ == '__main__': |
| 328 | main() |