Blame - gm/rebaseline_server/results.py - platform/external/skia

2013-09-27 15:02:44 +0000

[diff] [blame]

1

#!/usr/bin/python

2

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

3

"""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

4

5

6

Use of this source code is governed by a BSD-style license that can be

7

found in the LICENSE file.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

8

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

9

Repackage expected/actual GM results as needed by our HTML rebaseline viewer.

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

10

"""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

11

12

# System-level imports

13

import fnmatch

14

import json

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

15

import logging

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

import os

import re

import sys

# Imports from within Skia

21

#

22

# We need to add the 'gm' directory, so that we can import gm_json.py within

23

# that directory. That script allows us to parse the actual-results.json file

24

# written out by the GM tool.

25

# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*

26

# so any dirs that are already in the PYTHONPATH will be preferred.

27

GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

28

if GM_DIRECTORY not in sys.path:

29

sys.path.append(GM_DIRECTORY)

30

import gm_json

31

32

IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

33

CATEGORIES_TO_SUMMARIZE = [

34

'builder', 'test', 'config', 'resultType',

35

]

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

36

RESULTS_ALL = 'all'

37

RESULTS_FAILURES = 'failures'

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

38

39

class Results(object):

40

""" Loads actual and expected results from all builders, supplying combined

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

41

reports as requested.

42

43

Once this object has been constructed, the results are immutable. If you

44

want to update the results based on updated JSON file contents, you will

45

need to create a new Results object."""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

46

47

def __init__(self, actuals_root, expected_root):

48

"""

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

49

Args:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

50

actuals_root: root directory containing all actual-results.json files

51

expected_root: root directory containing all expected-results.json files

52

"""

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

53

self._actual_builder_dicts = Results._get_dicts_from_root(actuals_root)

54

self._expected_builder_dicts = Results._get_dicts_from_root(expected_root)

55

self._combine_actual_and_expected()

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

56

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

57

def get_results_of_type(self, type):

58

"""Return results of some/all tests (depending on 'type' parameter).

59

60

Args:

61

type: string describing which types of results to include; must be one

62

of the RESULTS_* constants

63

64

Results are returned as a dictionary in this form:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

65

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

66

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

67

'categories': # dictionary of categories listed in

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

68

# CATEGORIES_TO_SUMMARIZE, with the number of times

69

# each value appears within its category

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

70

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

71

'resultType': # category name

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

72

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

73

'failed': 29, # category value and total number found of that value

74

'failure-ignored': 948,

75

'no-comparison': 4502,

76

'succeeded': 38609,

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

77

},

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

78

'builder':

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

79

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

80

'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,

81

'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

82

...

83

},

84

... # other categories from CATEGORIES_TO_SUMMARIZE

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

85

}, # end of 'categories' dictionary

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

86

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

87

'testData': # list of test results, with a dictionary for each

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

88

[

89

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

90

'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',

91

'test': 'bigmatrix',

92

'config': '8888',

93

'resultType': 'failed',

94

'expectedHashType': 'bitmap-64bitMD5',

95

'expectedHashDigest': '10894408024079689926',

96

'actualHashType': 'bitmap-64bitMD5',

97

'actualHashDigest': '2409857384569',

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

98

},

99

...

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

100

], # end of 'testData' list

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

101

}

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

102

"""

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

103

return self._results[type]

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

104

105

@staticmethod

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

106

def _get_dicts_from_root(root, pattern='*.json'):

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

107

"""Read all JSON dictionaries within a directory tree.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

108

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

109

Args:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

110

root: path to root of directory tree

111

pattern: which files to read within root (fnmatch-style pattern)

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

112

113

Returns:

114

A meta-dictionary containing all the JSON dictionaries found within

115

the directory tree, keyed by the builder name of each dictionary.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

116

"""

117

meta_dict = {}

118

for dirpath, dirnames, filenames in os.walk(root):

119

for matching_filename in fnmatch.filter(filenames, pattern):

120

builder = os.path.basename(dirpath)

121

if builder.endswith('-Trybot'):

122

continue

123

fullpath = os.path.join(dirpath, matching_filename)

124

meta_dict[builder] = gm_json.LoadFromFile(fullpath)

125

return meta_dict

126

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

127

def _combine_actual_and_expected(self):

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

128

"""Gathers the results of all tests, across all builders (based on the

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

129

contents of self._actual_builder_dicts and self._expected_builder_dicts),

130

and stores them in self._results.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

131

"""

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

132

categories_all = {}

133

categories_failures = {}

134

Results._ensure_included_in_category_dict(categories_all,

135

'resultType', [

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

136

gm_json.JSONKEY_ACTUALRESULTS_FAILED,

137

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

138

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

139

gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,

140

])

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

141

Results._ensure_included_in_category_dict(categories_failures,

142

'resultType', [

143

gm_json.JSONKEY_ACTUALRESULTS_FAILED,

144

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

145

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

146

])

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

147

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

148

data_all = []

149

data_failures = []

150

for builder in sorted(self._actual_builder_dicts.keys()):

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

151

actual_results_for_this_builder = (

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

152

self._actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

153

for result_type in sorted(actual_results_for_this_builder.keys()):

154

results_of_this_type = actual_results_for_this_builder[result_type]

155

if not results_of_this_type:

156

continue

157

for image_name in sorted(results_of_this_type.keys()):

158

actual_image = results_of_this_type[image_name]

159

try:

160

# TODO(epoger): assumes a single allowed digest per test

161

expected_image = (

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

162

self._expected_builder_dicts

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

163

[builder][gm_json.JSONKEY_EXPECTEDRESULTS]

164

[image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS]

165

[0])

166

except (KeyError, TypeError):

167

# There are several cases in which we would expect to find

168

# no expectations for a given test:

169

#

170

# 1. result_type == NOCOMPARISON

171

# There are no expectations for this test yet!

172

#

173

# 2. ignore-tests.txt

174

# If a test has been listed in ignore-tests.txt, then its status

175

# may show as FAILUREIGNORED even if it doesn't have any

176

# expectations yet.

177

#

178

# 3. alternate rendering mode failures (e.g. serialized)

179

# In cases like

180

# https://code.google.com/p/skia/issues/detail?id=1684

181

# ('tileimagefilter GM test failing in serialized render mode'),

182

# the gm-actuals will list a failure for the alternate

183

# rendering mode even though we don't have explicit expectations

184

# for the test (the implicit expectation is that it must

185

# render the same in all rendering modes).

186

#

187

# Don't log types 1 or 2, because they are common.

188

# Log other types, because they are rare and we should know about

189

# them, but don't throw an exception, because we need to keep our

190

# tools working in the meanwhile!

191

if result_type not in [

192

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

193

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED] :

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

194

logging.warning('No expectations found for test: %s' % {

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

195

'builder': builder,

196

'image_name': image_name,

197

'result_type': result_type,

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

198

})

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

199

expected_image = [None, None]

200

201

# If this test was recently rebaselined, it will remain in

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

202

# the 'failed' set of actuals until all the bots have

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

203

# cycled (although the expectations have indeed been set

204

# from the most recent actuals). Treat these as successes

205

# instead of failures.

206

#

207

# TODO(epoger): Do we need to do something similar in

208

# other cases, such as when we have recently marked a test

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

209

# as ignoreFailure but it still shows up in the 'failed'

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

210

# category? Maybe we should not rely on the result_type

211

# categories recorded within the gm_actuals AT ALL, and

212

# instead evaluate the result_type ourselves based on what

213

# we see in expectations vs actual checksum?

214

if expected_image == actual_image:

215

updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED

216

else:

217

updated_result_type = result_type

218

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

219

(test, config) = IMAGE_FILENAME_RE.match(image_name).groups()

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

220

results_for_this_test = {

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

'builder': builder,

'test': test,

'config': config,

'resultType': updated_result_type,

225

'actualHashType': actual_image[0],

226

'actualHashDigest': str(actual_image[1]),

227

'expectedHashType': expected_image[0],

228

'expectedHashDigest': str(expected_image[1]),

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

229

}

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

230

Results._add_to_category_dict(categories_all, results_for_this_test)

231

data_all.append(results_for_this_test)

232

if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:

233

Results._add_to_category_dict(categories_failures,

234

results_for_this_test)

235

data_failures.append(results_for_this_test)

self._results = {

RESULTS_ALL:

{'categories': categories_all, 'testData': data_all},

240

RESULTS_FAILURES:

241

{'categories': categories_failures, 'testData': data_failures},

242

}

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

243

244

@staticmethod

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

245

def _add_to_category_dict(category_dict, test_results):

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

246

"""Add test_results to the category dictionary we are building.

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

247

(See documentation of self.get_results_of_type() for the format of this

248

dictionary.)

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

249

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

250

Args:

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

251

category_dict: category dict-of-dicts to add to; modify this in-place

252

test_results: test data with which to update category_list, in a dict:

253

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

254

'category_name': 'category_value',

255

'category_name': 'category_value',

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

...

}

"""

for category in CATEGORIES_TO_SUMMARIZE:

260

category_value = test_results.get(category)

261

if not category_value:

262

continue # test_results did not include this category, keep going

263

if not category_dict.get(category):

264

category_dict[category] = {}

265

if not category_dict[category].get(category_value):

266

category_dict[category][category_value] = 0

267

category_dict[category][category_value] += 1

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

268

269

@staticmethod

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

270

def _ensure_included_in_category_dict(category_dict,

271

category_name, category_values):

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

272

"""Ensure that the category name/value pairs are included in category_dict,

273

even if there aren't any results with that name/value pair.

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

274

(See documentation of self.get_results_of_type() for the format of this

275

dictionary.)

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

276

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

277

Args:

epoger@google.com