Blame - gm/rebaseline_server/results.py - platform/external/skia

2013-09-27 15:02:44 +0000

[diff] [blame]

1

#!/usr/bin/python

2

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

3

"""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

4

5

6

Use of this source code is governed by a BSD-style license that can be

7

found in the LICENSE file.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

8

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

9

Repackage expected/actual GM results as needed by our HTML rebaseline viewer.

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

10

"""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

11

12

# System-level imports

13

import fnmatch

14

import json

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

15

import logging

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

16

import os

17

import re

18

import sys

epoger@google.com

2013-10-15 20:10:33 +0000

[diff] [blame^]

19

import time

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

20

21

# Imports from within Skia

22

#

23

# We need to add the 'gm' directory, so that we can import gm_json.py within

24

# that directory. That script allows us to parse the actual-results.json file

25

# written out by the GM tool.

26

# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*

27

# so any dirs that are already in the PYTHONPATH will be preferred.

28

GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

29

if GM_DIRECTORY not in sys.path:

30

sys.path.append(GM_DIRECTORY)

31

import gm_json

32

33

IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

34

CATEGORIES_TO_SUMMARIZE = [

35

'builder', 'test', 'config', 'resultType',

36

]

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

37

RESULTS_ALL = 'all'

38

RESULTS_FAILURES = 'failures'

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

39

40

class Results(object):

41

""" Loads actual and expected results from all builders, supplying combined

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

42

reports as requested.

43

44

Once this object has been constructed, the results are immutable. If you

45

want to update the results based on updated JSON file contents, you will

46

need to create a new Results object."""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

47

48

def __init__(self, actuals_root, expected_root):

49

"""

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

50

Args:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

51

actuals_root: root directory containing all actual-results.json files

52

expected_root: root directory containing all expected-results.json files

53

"""

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

54

self._actual_builder_dicts = Results._get_dicts_from_root(actuals_root)

55

self._expected_builder_dicts = Results._get_dicts_from_root(expected_root)

56

self._combine_actual_and_expected()

epoger@google.com

2013-10-15 20:10:33 +0000

[diff] [blame^]

57

self._timestamp = int(time.time())

58

59

def get_timestamp(self):

60

"""Return the time at which this object was created, in seconds past epoch

61

(UTC).

62

"""

63

return self._timestamp

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

64

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

65

def get_results_of_type(self, type):

66

"""Return results of some/all tests (depending on 'type' parameter).

67

68

Args:

69

type: string describing which types of results to include; must be one

70

of the RESULTS_* constants

71

72

Results are returned as a dictionary in this form:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

73

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

74

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

75

'categories': # dictionary of categories listed in

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

76

# CATEGORIES_TO_SUMMARIZE, with the number of times

77

# each value appears within its category

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

78

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

79

'resultType': # category name

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

80

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

81

'failed': 29, # category value and total number found of that value

82

'failure-ignored': 948,

83

'no-comparison': 4502,

84

'succeeded': 38609,

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

85

},

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

86

'builder':

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

87

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

88

'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,

89

'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

90

...

91

},

92

... # other categories from CATEGORIES_TO_SUMMARIZE

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

93

}, # end of 'categories' dictionary

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

94

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

95

'testData': # list of test results, with a dictionary for each

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

96

[

97

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

98

'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',

99

'test': 'bigmatrix',

100

'config': '8888',

101

'resultType': 'failed',

102

'expectedHashType': 'bitmap-64bitMD5',

103

'expectedHashDigest': '10894408024079689926',

104

'actualHashType': 'bitmap-64bitMD5',

105

'actualHashDigest': '2409857384569',

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

106

},

107

...

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

108

], # end of 'testData' list

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

109

}

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

110

"""

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

111

return self._results[type]

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

112

113

@staticmethod

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

114

def _get_dicts_from_root(root, pattern='*.json'):

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

115

"""Read all JSON dictionaries within a directory tree.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

116

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

117

Args:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

118

root: path to root of directory tree

119

pattern: which files to read within root (fnmatch-style pattern)

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

120

121

Returns:

122

A meta-dictionary containing all the JSON dictionaries found within

123

the directory tree, keyed by the builder name of each dictionary.

epoger@google.com

2013-10-15 20:10:33 +0000

[diff] [blame^]

124

125

Raises:

126

IOError if root does not refer to an existing directory

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

127

"""

epoger@google.com

2013-10-15 20:10:33 +0000

[diff] [blame^]

128

if not os.path.isdir(root):

129

raise IOError('no directory found at path %s' % root)

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

130

meta_dict = {}

131

for dirpath, dirnames, filenames in os.walk(root):

132

for matching_filename in fnmatch.filter(filenames, pattern):

133

builder = os.path.basename(dirpath)

134

if builder.endswith('-Trybot'):

135

continue

136

fullpath = os.path.join(dirpath, matching_filename)

137

meta_dict[builder] = gm_json.LoadFromFile(fullpath)

138

return meta_dict

139

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

140

def _combine_actual_and_expected(self):

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

141

"""Gathers the results of all tests, across all builders (based on the

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

142

contents of self._actual_builder_dicts and self._expected_builder_dicts),

143

and stores them in self._results.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

144

"""

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

145

categories_all = {}

146

categories_failures = {}

147

Results._ensure_included_in_category_dict(categories_all,

148

'resultType', [

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

149

gm_json.JSONKEY_ACTUALRESULTS_FAILED,

150

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

151

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

152

gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,

153

])

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

154

Results._ensure_included_in_category_dict(categories_failures,

155

'resultType', [

156

gm_json.JSONKEY_ACTUALRESULTS_FAILED,

157

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

158

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

159

])

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

160

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

161

data_all = []

162

data_failures = []

163

for builder in sorted(self._actual_builder_dicts.keys()):

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

164

actual_results_for_this_builder = (

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

165

self._actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

166

for result_type in sorted(actual_results_for_this_builder.keys()):

167

results_of_this_type = actual_results_for_this_builder[result_type]

168

if not results_of_this_type:

169

continue

170

for image_name in sorted(results_of_this_type.keys()):

171

actual_image = results_of_this_type[image_name]

172

try:

173

# TODO(epoger): assumes a single allowed digest per test

174

expected_image = (

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

175

self._expected_builder_dicts

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

176

[builder][gm_json.JSONKEY_EXPECTEDRESULTS]

177

[image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS]

178

[0])

179

except (KeyError, TypeError):

180

# There are several cases in which we would expect to find

181

# no expectations for a given test:

182

#

183

# 1. result_type == NOCOMPARISON

184

# There are no expectations for this test yet!

185

#

186

# 2. ignore-tests.txt

187

# If a test has been listed in ignore-tests.txt, then its status

188

# may show as FAILUREIGNORED even if it doesn't have any

189

# expectations yet.

190

#

191

# 3. alternate rendering mode failures (e.g. serialized)

192

# In cases like

193

# https://code.google.com/p/skia/issues/detail?id=1684

194

# ('tileimagefilter GM test failing in serialized render mode'),

195

# the gm-actuals will list a failure for the alternate

196

# rendering mode even though we don't have explicit expectations

197

# for the test (the implicit expectation is that it must

198

# render the same in all rendering modes).

199

#

200

# Don't log types 1 or 2, because they are common.

201

# Log other types, because they are rare and we should know about

202

# them, but don't throw an exception, because we need to keep our

203

# tools working in the meanwhile!

204

if result_type not in [

205

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

206

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED] :

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

207

logging.warning('No expectations found for test: %s' % {

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

208

'builder': builder,

209

'image_name': image_name,

210

'result_type': result_type,

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

211

})

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

212

expected_image = [None, None]

213

214

# If this test was recently rebaselined, it will remain in

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

215

# the 'failed' set of actuals until all the bots have

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

216

# cycled (although the expectations have indeed been set

217

# from the most recent actuals). Treat these as successes

218

# instead of failures.

219

#

220

# TODO(epoger): Do we need to do something similar in

221

# other cases, such as when we have recently marked a test

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

222

# as ignoreFailure but it still shows up in the 'failed'

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

223

# category? Maybe we should not rely on the result_type

224

# categories recorded within the gm_actuals AT ALL, and

225

# instead evaluate the result_type ourselves based on what

226

# we see in expectations vs actual checksum?

227

if expected_image == actual_image:

228

updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED

229

else:

230

updated_result_type = result_type

231

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

232

(test, config) = IMAGE_FILENAME_RE.match(image_name).groups()

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

233

results_for_this_test = {

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

'builder': builder,

'test': test,

'config': config,

'resultType': updated_result_type,

238

'actualHashType': actual_image[0],

239

'actualHashDigest': str(actual_image[1]),

240

'expectedHashType': expected_image[0],

241

'expectedHashDigest': str(expected_image[1]),

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

242

}

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

243

Results._add_to_category_dict(categories_all, results_for_this_test)

244

data_all.append(results_for_this_test)

245

if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:

246

Results._add_to_category_dict(categories_failures,

247

results_for_this_test)

248

data_failures.append(results_for_this_test)

self._results = {

RESULTS_ALL:

{'categories': categories_all, 'testData': data_all},

253

RESULTS_FAILURES:

254

{'categories': categories_failures, 'testData': data_failures},

255

}

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

256

257

@staticmethod

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

258

def _add_to_category_dict(category_dict, test_results):

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

259

"""Add test_results to the category dictionary we are building.

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

260

(See documentation of self.get_results_of_type() for the format of this

261

dictionary.)

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

262

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

263

Args:

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

264

category_dict: category dict-of-dicts to add to; modify this in-place

265

test_results: test data with which to update category_list, in a dict:

266

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

267

'category_name': 'category_value',

268

'category_name': 'category_value',

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

...

}

"""

for category in CATEGORIES_TO_SUMMARIZE:

273

category_value = test_results.get(category)

274

if not category_value:

275

continue # test_results did not include this category, keep going

276

if not category_dict.get(category):

277

category_dict[category] = {}

278

if not category_dict[category].get(category_value):

279

category_dict[category][category_value] = 0

280

category_dict[category][category_value] += 1

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

281

282

@staticmethod

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

283

def _ensure_included_in_category_dict(category_dict,

284

category_name, category_values):

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

285

"""Ensure that the category name/value pairs are included in category_dict,

286

even if there aren't any results with that name/value pair.

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

287

(See documentation of self.get_results_of_type() for the format of this

288

dictionary.)

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

289

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

290

Args:

epoger@google.com