Blame - gm/rebaseline_server/results.py - platform/external/skia

2013-09-27 15:02:44 +0000

[diff] [blame]

1

#!/usr/bin/python

2

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

3

"""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

4

5

6

Use of this source code is governed by a BSD-style license that can be

7

found in the LICENSE file.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

8

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

9

Repackage expected/actual GM results as needed by our HTML rebaseline viewer.

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

10

"""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

11

12

# System-level imports

import fnmatch

import json

import os

import re

import sys

# Imports from within Skia

20

#

21

# We need to add the 'gm' directory, so that we can import gm_json.py within

22

# that directory. That script allows us to parse the actual-results.json file

23

# written out by the GM tool.

24

# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*

25

# so any dirs that are already in the PYTHONPATH will be preferred.

26

GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

27

if GM_DIRECTORY not in sys.path:

28

sys.path.append(GM_DIRECTORY)

29

import gm_json

30

31

IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

32

CATEGORIES_TO_SUMMARIZE = [

33

'builder', 'test', 'config', 'resultType',

34

]

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

35

36

class Results(object):

37

""" Loads actual and expected results from all builders, supplying combined

38

reports as requested. """

39

40

def __init__(self, actuals_root, expected_root):

41

"""

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

42

Args:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

43

actuals_root: root directory containing all actual-results.json files

44

expected_root: root directory containing all expected-results.json files

45

"""

46

self._actual_builder_dicts = Results._GetDictsFromRoot(actuals_root)

47

self._expected_builder_dicts = Results._GetDictsFromRoot(expected_root)

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

48

self._all_results = Results._Combine(

49

actual_builder_dicts=self._actual_builder_dicts,

50

expected_builder_dicts=self._expected_builder_dicts)

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

51

52

def GetAll(self):

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

53

"""Return results of all tests, as a dictionary in this form:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

54

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

55

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

56

'categories': # dictionary of categories listed in

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

57

# CATEGORIES_TO_SUMMARIZE, with the number of times

58

# each value appears within its category

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

59

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

60

'resultType': # category name

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

61

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

62

'failed': 29, # category value and total number found of that value

63

'failure-ignored': 948,

64

'no-comparison': 4502,

65

'succeeded': 38609,

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

66

},

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

67

'builder':

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

68

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

69

'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,

70

'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

71

...

72

},

73

... # other categories from CATEGORIES_TO_SUMMARIZE

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

74

}, # end of 'categories' dictionary

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

75

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

76

'testData': # list of test results, with a dictionary for each

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

77

[

78

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

79

'index': 0, # index of this result within testData list

80

'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',

81

'test': 'bigmatrix',

82

'config': '8888',

83

'resultType': 'failed',

84

'expectedHashType': 'bitmap-64bitMD5',

85

'expectedHashDigest': '10894408024079689926',

86

'actualHashType': 'bitmap-64bitMD5',

87

'actualHashDigest': '2409857384569',

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

88

},

89

...

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

90

], # end of 'testData' list

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

91

}

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

92

"""

93

return self._all_results

94

95

@staticmethod

96

def _GetDictsFromRoot(root, pattern='*.json'):

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

97

"""Read all JSON dictionaries within a directory tree.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

98

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

99

Args:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

100

root: path to root of directory tree

101

pattern: which files to read within root (fnmatch-style pattern)

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

102

103

Returns:

104

A meta-dictionary containing all the JSON dictionaries found within

105

the directory tree, keyed by the builder name of each dictionary.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

106

"""

107

meta_dict = {}

108

for dirpath, dirnames, filenames in os.walk(root):

109

for matching_filename in fnmatch.filter(filenames, pattern):

110

builder = os.path.basename(dirpath)

111

if builder.endswith('-Trybot'):

112

continue

113

fullpath = os.path.join(dirpath, matching_filename)

114

meta_dict[builder] = gm_json.LoadFromFile(fullpath)

115

return meta_dict

116

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

117

@staticmethod

118

def _Combine(actual_builder_dicts, expected_builder_dicts):

119

"""Gathers the results of all tests, across all builders (based on the

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

120

contents of actual_builder_dicts and expected_builder_dicts).

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

121

122

This is a static method, because once we start refreshing results

123

asynchronously, we need to make sure we are not corrupting the object's

124

member variables.

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

125

126

Args:

127

actual_builder_dicts: a meta-dictionary of all actual JSON results,

128

as returned by _GetDictsFromRoot().

129

actual_builder_dicts: a meta-dictionary of all expected JSON results,

130

as returned by _GetDictsFromRoot().

131

132

Returns:

133

A list of all the results of all tests, in the same form returned by

134

self.GetAll().

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

135

"""

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

136

test_data = []

137

category_dict = {}

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

138

Results._EnsureIncludedInCategoryDict(category_dict, 'resultType', [

139

gm_json.JSONKEY_ACTUALRESULTS_FAILED,

140

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

141

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

142

gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,

143

])

144

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

145

for builder in sorted(actual_builder_dicts.keys()):

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

146

actual_results_for_this_builder = (

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

147

actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

148

for result_type in sorted(actual_results_for_this_builder.keys()):

149

results_of_this_type = actual_results_for_this_builder[result_type]

150

if not results_of_this_type:

151

continue

152

for image_name in sorted(results_of_this_type.keys()):

153

actual_image = results_of_this_type[image_name]

154

try:

155

# TODO(epoger): assumes a single allowed digest per test

156

expected_image = (

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

157

expected_builder_dicts

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

158

[builder][gm_json.JSONKEY_EXPECTEDRESULTS]

159

[image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS]

160

[0])

161

except (KeyError, TypeError):

162

# There are several cases in which we would expect to find

163

# no expectations for a given test:

164

#

165

# 1. result_type == NOCOMPARISON

166

# There are no expectations for this test yet!

167

#

168

# 2. ignore-tests.txt

169

# If a test has been listed in ignore-tests.txt, then its status

170

# may show as FAILUREIGNORED even if it doesn't have any

171

# expectations yet.

172

#

173

# 3. alternate rendering mode failures (e.g. serialized)

174

# In cases like

175

# https://code.google.com/p/skia/issues/detail?id=1684

176

# ('tileimagefilter GM test failing in serialized render mode'),

177

# the gm-actuals will list a failure for the alternate

178

# rendering mode even though we don't have explicit expectations

179

# for the test (the implicit expectation is that it must

180

# render the same in all rendering modes).

181

#

182

# Don't log types 1 or 2, because they are common.

183

# Log other types, because they are rare and we should know about

184

# them, but don't throw an exception, because we need to keep our

185

# tools working in the meanwhile!

186

if result_type not in [

187

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

188

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED] :

189

print 'WARNING: No expectations found for test: %s' % {

190

'builder': builder,

191

'image_name': image_name,

192

'result_type': result_type,

193

}

194

expected_image = [None, None]

195

196

# If this test was recently rebaselined, it will remain in

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

197

# the 'failed' set of actuals until all the bots have

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

198

# cycled (although the expectations have indeed been set

199

# from the most recent actuals). Treat these as successes

200

# instead of failures.

201

#

202

# TODO(epoger): Do we need to do something similar in

203

# other cases, such as when we have recently marked a test

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

204

# as ignoreFailure but it still shows up in the 'failed'

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

205

# category? Maybe we should not rely on the result_type

206

# categories recorded within the gm_actuals AT ALL, and

207

# instead evaluate the result_type ourselves based on what

208

# we see in expectations vs actual checksum?

209

if expected_image == actual_image:

210

updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED

211

else:

212

updated_result_type = result_type

213

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

214

(test, config) = IMAGE_FILENAME_RE.match(image_name).groups()

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

215

results_for_this_test = {

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

216

'index': len(test_data),

'builder': builder,

'test': test,

'config': config,

'resultType': updated_result_type,

221

'actualHashType': actual_image[0],

222

'actualHashDigest': str(actual_image[1]),

223

'expectedHashType': expected_image[0],

224

'expectedHashDigest': str(expected_image[1]),

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

225

}

226

Results._AddToCategoryDict(category_dict, results_for_this_test)

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

227

test_data.append(results_for_this_test)

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

228

return {'categories': category_dict, 'testData': test_data}

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

229

230

@staticmethod

231

def _AddToCategoryDict(category_dict, test_results):

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

232

"""Add test_results to the category dictionary we are building.

233

(See documentation of self.GetAll() for the format of this dictionary.)

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

234

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

235

Args:

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

236

category_dict: category dict-of-dicts to add to; modify this in-place

237

test_results: test data with which to update category_list, in a dict:

238

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

239

'category_name': 'category_value',

240

'category_name': 'category_value',

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

...

}

"""

for category in CATEGORIES_TO_SUMMARIZE:

245

category_value = test_results.get(category)

246

if not category_value:

247

continue # test_results did not include this category, keep going

248

if not category_dict.get(category):

249

category_dict[category] = {}

250

if not category_dict[category].get(category_value):

251

category_dict[category][category_value] = 0

252

category_dict[category][category_value] += 1

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

253

254

@staticmethod

255

def _EnsureIncludedInCategoryDict(category_dict,

256

category_name, category_values):

257

"""Ensure that the category name/value pairs are included in category_dict,

258

even if there aren't any results with that name/value pair.

259

(See documentation of self.GetAll() for the format of this dictionary.)

260

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame^]

261

Args:

epoger@google.com