Blame - clang/utils/analyzer/CmpRuns.py - toolchain/llvm-project

2009-08-06 21:15:33 +0000

[diff] [blame]

1

#!/usr/bin/env python

2

3

"""

4

CmpRuns - A simple tool for comparing two static analyzer runs to determine

5

which reports have been added, removed, or changed.

6

7

This is designed to support automated testing using the static analyzer, from

8

two perspectives:

9

1. To monitor changes in the static analyzer's reports on real code bases, for

10

regression testing.

11

12

2. For use by end users who want to integrate regular static analyzer testing

13

into a buildbot like environment.

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

Usage:

# Load the results of both runs, to obtain lists of the corresponding

18

# AnalysisDiagnostic objects.

Anna Zaks

2012-08-02 00:41:40 +0000

[diff] [blame]

19

#

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

20

resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)

21

resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

22

23

# Generate a relation from diagnostics in run A to diagnostics in run B

24

# to obtain a list of triples (a, b, confidence).

25

diff = compareResults(resultsA, resultsB)

26

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

"""

import os

import plistlib

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

31

import CmpRuns

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

32

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

33

# Information about analysis run:

34

# path - the analysis output directory

35

# root - the name of the root directory, which will be disregarded when

36

# determining the source file name

37

class SingleRunInfo:

38

def __init__(self, path, root="", verboseLog=None):

39

self.path = path

40

self.root = root

41

self.verboseLog = verboseLog

42

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

43

class AnalysisDiagnostic:

44

def __init__(self, data, report, htmlReport):

45

self._data = data

46

self._loc = self._data['location']

47

self._report = report

48

self._htmlReport = htmlReport

49

50

def getFileName(self):

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

51

root = self._report.run.root

Anna Zaks

2012-10-17 21:09:26 +0000

[diff] [blame]

52

fileName = self._report.files[self._loc['file']]

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

53

if fileName.startswith(root) :

54

return fileName[len(root):]

55

return fileName

56

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

57

def getLine(self):

58

return self._loc['line']

59

60

def getColumn(self):

61

return self._loc['col']

62

63

def getCategory(self):

64

return self._data['category']

65

66

def getDescription(self):

67

return self._data['description']

68

69

def getIssueIdentifier(self) :

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

70

id = self.getFileName() + "+"

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

71

if 'issue_context' in self._data :

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

72

id += self._data['issue_context'] + "+"

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

73

if 'issue_hash' in self._data :

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

74

id += str(self._data['issue_hash'])

75

return id

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

76

77

def getReport(self):

78

if self._htmlReport is None:

79

return " "

80

return os.path.join(self._report.run.path, self._htmlReport)

81

82

def getReadableName(self):

83

return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),

84

self.getColumn(), self.getCategory(),

85

self.getDescription())

Anna Zaks

2012-10-17 21:09:26 +0000

[diff] [blame]

86

87

# Note, the data format is not an API and may change from one analyzer

88

# version to another.

89

def getRawData(self):

90

return self._data

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

91

92

class multidict:

93

def __init__(self, elts=()):

94

self.data = {}

95

for key,value in elts:

96

self[key] = value

97

98

def __getitem__(self, item):

99

return self.data[item]

100

def __setitem__(self, key, value):

101

if key in self.data:

102

self.data[key].append(value)

103

else:

104

self.data[key] = [value]

105

def items(self):

106

return self.data.items()

107

def values(self):

108

return self.data.values()

109

def keys(self):

110

return self.data.keys()

111

def __len__(self):

112

return len(self.data)

113

def get(self, key, default=None):

114

return self.data.get(key, default)

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

115

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

116

class CmpOptions:

Anna Zaks

2012-08-02 00:41:40 +0000

[diff] [blame]

117

def __init__(self, verboseLog=None, rootA="", rootB=""):

118

self.rootA = rootA

119

self.rootB = rootB

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

120

self.verboseLog = verboseLog

121

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

122

class AnalysisReport:

Anna Zaks

fab9bb6

2012-11-15 22:42:44 +0000

[diff] [blame]

123

def __init__(self, run, files):

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

124

self.run = run

Anna Zaks

2012-10-17 21:09:26 +0000

[diff] [blame]

125

self.files = files

126

self.diagnostics = []

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

127

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

128

class AnalysisRun:

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

129

def __init__(self, info):

130

self.path = info.path

131

self.root = info.root

132

self.info = info

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

133

self.reports = []

Anna Zaks

2012-10-17 21:09:26 +0000

[diff] [blame]

134

# Cumulative list of all diagnostics from all the reports.

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

135

self.diagnostics = []

Anna Zaks

fab9bb6

2012-11-15 22:42:44 +0000

[diff] [blame]

136

self.clang_version = None

137

138

def getClangVersion(self):

139

return self.clang_version

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

140

Jordan Rose

b042cc7

2013-03-23 01:21:26 +0000

[diff] [blame^]

141

def readSingleFile(self, p, deleteEmpty):

142

data = plistlib.readPlist(p)

143

144

# We want to retrieve the clang version even if there are no

145

# reports. Assume that all reports were created using the same

146

# clang version (this is always true and is more efficient).

147

if 'clang_version' in data:

148

if self.clang_version == None:

149

self.clang_version = data.pop('clang_version')

150

else:

151

data.pop('clang_version')

152

153

# Ignore/delete empty reports.

154

if not data['files']:

155

if deleteEmpty == True:

os.remove(p)

return

# Extract the HTML reports, if they exists.

160

if 'HTMLDiagnostics_files' in data['diagnostics'][0]:

161

htmlFiles = []

162

for d in data['diagnostics']:

163

# FIXME: Why is this named files, when does it have multiple

164

# files?

165

assert len(d['HTMLDiagnostics_files']) == 1

166

htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])

167

else:

168

htmlFiles = [None] * len(data['diagnostics'])

169

170

report = AnalysisReport(self, data.pop('files'))

171

diagnostics = [AnalysisDiagnostic(d, report, h)

172

for d,h in zip(data.pop('diagnostics'),

htmlFiles)]

assert not data

report.diagnostics.extend(diagnostics)

178

self.reports.append(report)

179

self.diagnostics.extend(diagnostics)

180

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

181

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

182

# Backward compatibility API.

Anna Zaks

2012-08-02 00:41:40 +0000

[diff] [blame]

183

def loadResults(path, opts, root = "", deleteEmpty=True):

Anna Zaks

2012-10-15 22:48:21 +0000

[diff] [blame]

184

return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),

185

deleteEmpty)

186

187

# Load results of the analyzes from a given output folder.

188

# - info is the SingleRunInfo object

189

# - deleteEmpty specifies if the empty plist files should be deleted

190

def loadResultsFromSingleRun(info, deleteEmpty=True):

191

path = info.path

192

run = AnalysisRun(info)

Jordan Rose

b042cc7

2013-03-23 01:21:26 +0000

[diff] [blame^]

193

194

if os.path.isfile(path):

195

run.readSingleFile(path, deleteEmpty)

196

else:

197

for (dirpath, dirnames, filenames) in os.walk(path):

198

for f in filenames:

199

if (not f.endswith('plist')):

200

continue

201

p = os.path.join(dirpath, f)

202

run.readSingleFile(p, deleteEmpty)

203

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

204

return run

205

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

206

def cmpAnalysisDiagnostic(d) :

207

return d.getIssueIdentifier()

Anna Zaks

d60367b

2012-06-08 01:50:49 +0000

[diff] [blame]

208

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

209

def compareResults(A, B):

210

"""

211

compareResults - Generate a relation from diagnostics in run A to

212

diagnostics in run B.

213

214

The result is the relation as a list of triples (a, b, confidence) where

215

each element {a,b} is None or an element from the respective run, and

216

confidence is a measure of the match quality (where 0 indicates equality,

217

and None is used if either element is None).

"""

res = []

# Quickly eliminate equal elements.

223

neqA = []

224

neqB = []

225

eltsA = list(A.diagnostics)

226

eltsB = list(B.diagnostics)

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

227

eltsA.sort(key = cmpAnalysisDiagnostic)

228

eltsB.sort(key = cmpAnalysisDiagnostic)

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

229

while eltsA and eltsB:

230

a = eltsA.pop()

231

b = eltsB.pop()

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

232

if (a.getIssueIdentifier() == b.getIssueIdentifier()) :

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

233

res.append((a, b, 0))

Anna Zaks

2012-10-17 21:09:26 +0000

[diff] [blame]

234

elif a.getIssueIdentifier() > b.getIssueIdentifier():

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

235

eltsB.append(b)

Anna Zaks

2012-10-17 21:09:26 +0000

[diff] [blame]

236

neqA.append(a)

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

237

else:

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

238

eltsA.append(a)

Anna Zaks

2012-10-17 21:09:26 +0000

[diff] [blame]

239

neqB.append(b)

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

neqA.extend(eltsA)

neqB.extend(eltsB)

# FIXME: Add fuzzy matching. One simple and possible effective idea would be

244

# to bin the diagnostics, print them in a normalized form (based solely on

245

# the structure of the diagnostic), compute the diff, then use that as the

246

# basis for matching. This has the nice property that we don't depend in any

247

# way on the diagnostic format.

248

249

for a in neqA:

250

res.append((a, None, None))

251

for b in neqB:

252

res.append((None, b, None))

return res

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

256

def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

257

# Load the run results.

Anna Zaks

2012-08-02 00:41:40 +0000

[diff] [blame]

258

resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)

259

resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

260

261

# Open the verbose log, if given.

262

if opts.verboseLog:

263

auxLog = open(opts.verboseLog, "wb")

else:

auxLog = None

diff = compareResults(resultsA, resultsB)

Anna Zaks

2011-11-08 19:56:31 +0000

[diff] [blame]

268

foundDiffs = 0

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

for res in diff:

a,b,confidence = res

if a is None:

print "ADDED: %r" % b.getReadableName()

Anna Zaks

2011-11-08 19:56:31 +0000

[diff] [blame]

273

foundDiffs += 1

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

274

if auxLog:

275

print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

276

b.getReport()))

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

277

elif b is None:

278

print "REMOVED: %r" % a.getReadableName()

Anna Zaks

2011-11-08 19:56:31 +0000

[diff] [blame]

279

foundDiffs += 1

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

280

if auxLog:

281

print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

282

a.getReport()))

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

283

elif confidence:

284

print "CHANGED: %r to %r" % (a.getReadableName(),

285

b.getReadableName())

Anna Zaks

2011-11-08 19:56:31 +0000

[diff] [blame]

286

foundDiffs += 1

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

287

if auxLog:

288

print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"

289

% (a.getReadableName(),

290

b.getReadableName(),

Anna Zaks

2012-07-16 20:21:42 +0000

[diff] [blame]

291

a.getReport(),

292

b.getReport()))

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

else:

pass

Anna Zaks

2011-11-08 19:56:31 +0000

[diff] [blame]

296

TotalReports = len(resultsB.diagnostics)

297

print "TOTAL REPORTS: %r" % TotalReports

298

print "TOTAL DIFFERENCES: %r" % foundDiffs

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

299

if auxLog:

Anna Zaks

2011-11-08 19:56:31 +0000

[diff] [blame]

300

print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports

301

print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs

302

Anna Zaks

2011-09-12 21:32:41 +0000

[diff] [blame]

303

return foundDiffs

304

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

305

def main():

306

from optparse import OptionParser

307

parser = OptionParser("usage: %prog [options] [dir A] [dir B]")

Anna Zaks

2012-08-02 00:41:40 +0000

[diff] [blame]

308

parser.add_option("", "--rootA", dest="rootA",

309

help="Prefix to ignore on source files for directory A",

310

action="store", type=str, default="")

311

parser.add_option("", "--rootB", dest="rootB",

312

help="Prefix to ignore on source files for directory B",

Daniel Dunbar

2009-08-06 21:15:33 +0000

[diff] [blame]

313

action="store", type=str, default="")

314

parser.add_option("", "--verbose-log", dest="verboseLog",

315

help="Write additional information to LOG [default=None]",

316

action="store", type=str, default=None,

317

metavar="LOG")

318

(opts, args) = parser.parse_args()

319

320

if len(args) != 2:

321

parser.error("invalid number of arguments")

dirA,dirB = args

Jordan Rose

2012-07-26 20:03:51 +0000

[diff] [blame]

325

dumpScanBuildResultsDiff(dirA, dirB, opts)

Daniel Dunbar