Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | CmpRuns - A simple tool for comparing two static analyzer runs to determine |
| 5 | which reports have been added, removed, or changed. |
| 6 | |
| 7 | This is designed to support automated testing using the static analyzer, from |
| 8 | two perspectives: |
| 9 | 1. To monitor changes in the static analyzer's reports on real code bases, for |
| 10 | regression testing. |
| 11 | |
| 12 | 2. For use by end users who want to integrate regular static analyzer testing |
| 13 | into a buildbot like environment. |
| 14 | """ |
| 15 | |
| 16 | import os |
| 17 | import plistlib |
| 18 | |
| 19 | # |
| 20 | |
| 21 | class multidict: |
| 22 | def __init__(self, elts=()): |
| 23 | self.data = {} |
| 24 | for key,value in elts: |
| 25 | self[key] = value |
| 26 | |
| 27 | def __getitem__(self, item): |
| 28 | return self.data[item] |
| 29 | def __setitem__(self, key, value): |
| 30 | if key in self.data: |
| 31 | self.data[key].append(value) |
| 32 | else: |
| 33 | self.data[key] = [value] |
| 34 | def items(self): |
| 35 | return self.data.items() |
| 36 | def values(self): |
| 37 | return self.data.values() |
| 38 | def keys(self): |
| 39 | return self.data.keys() |
| 40 | def __len__(self): |
| 41 | return len(self.data) |
| 42 | def get(self, key, default=None): |
| 43 | return self.data.get(key, default) |
| 44 | |
| 45 | # |
| 46 | |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 47 | class CmpOptions: |
| 48 | def __init__(self, verboseLog=None, root=""): |
| 49 | self.root = root |
| 50 | self.verboseLog = verboseLog |
| 51 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 52 | class AnalysisReport: |
| 53 | def __init__(self, run, files): |
| 54 | self.run = run |
| 55 | self.files = files |
| 56 | |
| 57 | class AnalysisDiagnostic: |
| 58 | def __init__(self, data, report, htmlReport): |
| 59 | self.data = data |
| 60 | self.report = report |
| 61 | self.htmlReport = htmlReport |
| 62 | |
| 63 | def getReadableName(self): |
| 64 | loc = self.data['location'] |
| 65 | filename = self.report.run.getSourceName(self.report.files[loc['file']]) |
| 66 | line = loc['line'] |
| 67 | column = loc['col'] |
| 68 | |
| 69 | # FIXME: Get a report number based on this key, to 'distinguish' |
| 70 | # reports, or something. |
| 71 | |
| 72 | return '%s:%d:%d' % (filename, line, column) |
| 73 | |
| 74 | def getReportData(self): |
| 75 | if self.htmlReport is None: |
| 76 | return "This diagnostic does not have any report data." |
| 77 | |
| 78 | return open(os.path.join(self.report.run.path, |
| 79 | self.htmlReport), "rb").read() |
| 80 | |
| 81 | class AnalysisRun: |
| 82 | def __init__(self, path, opts): |
| 83 | self.path = path |
| 84 | self.reports = [] |
| 85 | self.diagnostics = [] |
| 86 | self.opts = opts |
| 87 | |
| 88 | def getSourceName(self, path): |
| 89 | if path.startswith(self.opts.root): |
| 90 | return path[len(self.opts.root):] |
| 91 | return path |
| 92 | |
Anna Zaks | f95a201 | 2011-09-12 22:40:36 +0000 | [diff] [blame] | 93 | def loadResults(path, opts, deleteEmpty=True): |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 94 | run = AnalysisRun(path, opts) |
| 95 | |
| 96 | for f in os.listdir(path): |
| 97 | if (not f.startswith('report') or |
| 98 | not f.endswith('plist')): |
| 99 | continue |
| 100 | |
| 101 | p = os.path.join(path, f) |
| 102 | data = plistlib.readPlist(p) |
| 103 | |
Anna Zaks | f95a201 | 2011-09-12 22:40:36 +0000 | [diff] [blame] | 104 | # Ignore/delete empty reports. |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 105 | if not data['files']: |
Anna Zaks | f95a201 | 2011-09-12 22:40:36 +0000 | [diff] [blame] | 106 | if deleteEmpty == True: |
| 107 | os.remove(p) |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 108 | continue |
| 109 | |
| 110 | # Extract the HTML reports, if they exists. |
| 111 | if 'HTMLDiagnostics_files' in data['diagnostics'][0]: |
| 112 | htmlFiles = [] |
| 113 | for d in data['diagnostics']: |
| 114 | # FIXME: Why is this named files, when does it have multiple |
| 115 | # files? |
| 116 | assert len(d['HTMLDiagnostics_files']) == 1 |
| 117 | htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) |
| 118 | else: |
| 119 | htmlFiles = [None] * len(data['diagnostics']) |
| 120 | |
| 121 | report = AnalysisReport(run, data.pop('files')) |
| 122 | diagnostics = [AnalysisDiagnostic(d, report, h) |
| 123 | for d,h in zip(data.pop('diagnostics'), |
| 124 | htmlFiles)] |
| 125 | |
| 126 | assert not data |
| 127 | |
| 128 | run.reports.append(report) |
| 129 | run.diagnostics.extend(diagnostics) |
| 130 | |
| 131 | return run |
| 132 | |
| 133 | def compareResults(A, B): |
| 134 | """ |
| 135 | compareResults - Generate a relation from diagnostics in run A to |
| 136 | diagnostics in run B. |
| 137 | |
| 138 | The result is the relation as a list of triples (a, b, confidence) where |
| 139 | each element {a,b} is None or an element from the respective run, and |
| 140 | confidence is a measure of the match quality (where 0 indicates equality, |
| 141 | and None is used if either element is None). |
| 142 | """ |
| 143 | |
| 144 | res = [] |
| 145 | |
| 146 | # Quickly eliminate equal elements. |
| 147 | neqA = [] |
| 148 | neqB = [] |
| 149 | eltsA = list(A.diagnostics) |
| 150 | eltsB = list(B.diagnostics) |
| 151 | eltsA.sort(key = lambda d: d.data) |
| 152 | eltsB.sort(key = lambda d: d.data) |
| 153 | while eltsA and eltsB: |
| 154 | a = eltsA.pop() |
| 155 | b = eltsB.pop() |
| 156 | if a.data == b.data: |
| 157 | res.append((a, b, 0)) |
| 158 | elif a.data > b.data: |
| 159 | neqA.append(a) |
| 160 | eltsB.append(b) |
| 161 | else: |
| 162 | neqB.append(b) |
| 163 | eltsA.append(a) |
| 164 | neqA.extend(eltsA) |
| 165 | neqB.extend(eltsB) |
| 166 | |
| 167 | # FIXME: Add fuzzy matching. One simple and possible effective idea would be |
| 168 | # to bin the diagnostics, print them in a normalized form (based solely on |
| 169 | # the structure of the diagnostic), compute the diff, then use that as the |
| 170 | # basis for matching. This has the nice property that we don't depend in any |
| 171 | # way on the diagnostic format. |
| 172 | |
| 173 | for a in neqA: |
| 174 | res.append((a, None, None)) |
| 175 | for b in neqB: |
| 176 | res.append((None, b, None)) |
| 177 | |
| 178 | return res |
| 179 | |
Anna Zaks | f95a201 | 2011-09-12 22:40:36 +0000 | [diff] [blame] | 180 | def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True): |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 181 | # Load the run results. |
Anna Zaks | f95a201 | 2011-09-12 22:40:36 +0000 | [diff] [blame] | 182 | resultsA = loadResults(dirA, opts, deleteEmpty) |
| 183 | resultsB = loadResults(dirB, opts, deleteEmpty) |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 184 | |
| 185 | # Open the verbose log, if given. |
| 186 | if opts.verboseLog: |
| 187 | auxLog = open(opts.verboseLog, "wb") |
| 188 | else: |
| 189 | auxLog = None |
| 190 | |
| 191 | diff = compareResults(resultsA, resultsB) |
| 192 | foundDiffs = False |
| 193 | for res in diff: |
| 194 | a,b,confidence = res |
| 195 | if a is None: |
| 196 | print "ADDED: %r" % b.getReadableName() |
| 197 | foundDiffs = True |
| 198 | if auxLog: |
| 199 | print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), |
| 200 | b.getReportData())) |
| 201 | elif b is None: |
| 202 | print "REMOVED: %r" % a.getReadableName() |
| 203 | foundDiffs = True |
| 204 | if auxLog: |
| 205 | print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), |
| 206 | a.getReportData())) |
| 207 | elif confidence: |
| 208 | print "CHANGED: %r to %r" % (a.getReadableName(), |
| 209 | b.getReadableName()) |
| 210 | foundDiffs = True |
| 211 | if auxLog: |
| 212 | print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" |
| 213 | % (a.getReadableName(), |
| 214 | b.getReadableName(), |
| 215 | a.getReportData(), |
| 216 | b.getReportData())) |
| 217 | else: |
| 218 | pass |
| 219 | |
| 220 | print "TOTAL REPORTS: %r" % len(resultsB.diagnostics) |
| 221 | if auxLog: |
| 222 | print >>auxLog, "('TOTAL REPORTS', %r)" % len(resultsB.diagnostics) |
| 223 | |
| 224 | return foundDiffs |
| 225 | |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 226 | def main(): |
| 227 | from optparse import OptionParser |
| 228 | parser = OptionParser("usage: %prog [options] [dir A] [dir B]") |
| 229 | parser.add_option("", "--root", dest="root", |
| 230 | help="Prefix to ignore on source files", |
| 231 | action="store", type=str, default="") |
| 232 | parser.add_option("", "--verbose-log", dest="verboseLog", |
| 233 | help="Write additional information to LOG [default=None]", |
| 234 | action="store", type=str, default=None, |
| 235 | metavar="LOG") |
| 236 | (opts, args) = parser.parse_args() |
| 237 | |
| 238 | if len(args) != 2: |
| 239 | parser.error("invalid number of arguments") |
| 240 | |
| 241 | dirA,dirB = args |
| 242 | |
Anna Zaks | 544055f | 2011-09-12 21:32:41 +0000 | [diff] [blame] | 243 | cmpScanBuildResults(dirA, dirB, opts) |
Daniel Dunbar | 301f7ac | 2009-08-06 21:15:33 +0000 | [diff] [blame] | 244 | |
| 245 | if __name__ == '__main__': |
| 246 | main() |