Blame - gm/rebaseline_server/results.py - platform/external/skia

2013-09-27 15:02:44 +0000

[diff] [blame]

1

#!/usr/bin/python

2

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

3

"""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

4

5

6

Use of this source code is governed by a BSD-style license that can be

7

found in the LICENSE file.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

8

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

9

Repackage expected/actual GM results as needed by our HTML rebaseline viewer.

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

10

"""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

11

12

# System-level imports

13

import fnmatch

14

import json

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

15

import logging

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

16

import os

17

import re

18

import sys

epoger@google.com

2013-10-15 20:10:33 +0000

[diff] [blame]

19

import time

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

20

21

# Imports from within Skia

22

#

23

# We need to add the 'gm' directory, so that we can import gm_json.py within

24

# that directory. That script allows us to parse the actual-results.json file

25

# written out by the GM tool.

26

# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*

27

# so any dirs that are already in the PYTHONPATH will be preferred.

28

GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

29

if GM_DIRECTORY not in sys.path:

30

sys.path.append(GM_DIRECTORY)

31

import gm_json

32

33

IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

34

IMAGE_FILENAME_FORMATTER = '%s_%s.png' # pass in (testname, config)

35

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

36

CATEGORIES_TO_SUMMARIZE = [

37

'builder', 'test', 'config', 'resultType',

38

]

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

39

RESULTS_ALL = 'all'

40

RESULTS_FAILURES = 'failures'

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

41

42

class Results(object):

43

""" Loads actual and expected results from all builders, supplying combined

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

44

reports as requested.

45

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

46

Once this object has been constructed, the results (in self._results[])

47

are immutable. If you want to update the results based on updated JSON

48

file contents, you will need to create a new Results object."""

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

49

50

def __init__(self, actuals_root, expected_root):

51

"""

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

52

Args:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

53

actuals_root: root directory containing all actual-results.json files

54

expected_root: root directory containing all expected-results.json files

55

"""

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

56

self._actuals_root = actuals_root

57

self._expected_root = expected_root

58

self._load_actual_and_expected()

epoger@google.com

2013-10-15 20:10:33 +0000

[diff] [blame]

59

self._timestamp = int(time.time())

60

61

def get_timestamp(self):

62

"""Return the time at which this object was created, in seconds past epoch

63

(UTC).

64

"""

65

return self._timestamp

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

66

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

67

def edit_expectations(self, modifications):

68

"""Edit the expectations stored within this object and write them back

69

to disk.

70

71

Note that this will NOT update the results stored in self._results[] ;

72

in order to see those updates, you must instantiate a new Results object

73

based on the (now updated) files on disk.

74

75

Args:

76

modifications: a list of dictionaries, one for each expectation to update:

[

{

'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',

81

'test': 'bigmatrix',

82

'config': '8888',

83

'expectedHashType': 'bitmap-64bitMD5',

84

'expectedHashDigest': '10894408024079689926',

},

...

]

TODO(epoger): For now, this does not allow the caller to set any fields

90

other than expectedHashType/expectedHashDigest, and assumes that

91

ignore-failure should be set to False. We need to add support

92

for other fields (notes, bugs, etc.) and ignore-failure=True.

93

"""

94

expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)

95

for mod in modifications:

96

image_name = IMAGE_FILENAME_FORMATTER % (mod['test'], mod['config'])

97

# TODO(epoger): assumes a single allowed digest per test

98

allowed_digests = [[mod['expectedHashType'],

99

int(mod['expectedHashDigest'])]]

100

new_expectations = {

101

gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests,

102

gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE: False,

103

}

104

builder_dict = expected_builder_dicts[mod['builder']]

105

builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS)

106

if not builder_expectations:

107

builder_expectations = {}

108

builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations

109

builder_expectations[image_name] = new_expectations

110

Results._write_dicts_to_root(expected_builder_dicts, self._expected_root)

111

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

112

def get_results_of_type(self, type):

113

"""Return results of some/all tests (depending on 'type' parameter).

114

115

Args:

116

type: string describing which types of results to include; must be one

117

of the RESULTS_* constants

118

119

Results are returned as a dictionary in this form:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

120

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

121

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

122

'categories': # dictionary of categories listed in

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

123

# CATEGORIES_TO_SUMMARIZE, with the number of times

124

# each value appears within its category

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

125

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

126

'resultType': # category name

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

127

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

128

'failed': 29, # category value and total number found of that value

129

'failure-ignored': 948,

130

'no-comparison': 4502,

131

'succeeded': 38609,

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

132

},

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

133

'builder':

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

134

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

135

'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,

136

'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

137

...

138

},

139

... # other categories from CATEGORIES_TO_SUMMARIZE

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

140

}, # end of 'categories' dictionary

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

141

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

142

'testData': # list of test results, with a dictionary for each

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

143

[

144

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

145

'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',

146

'test': 'bigmatrix',

147

'config': '8888',

148

'resultType': 'failed',

149

'expectedHashType': 'bitmap-64bitMD5',

150

'expectedHashDigest': '10894408024079689926',

151

'actualHashType': 'bitmap-64bitMD5',

152

'actualHashDigest': '2409857384569',

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

153

},

154

...

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

155

], # end of 'testData' list

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

156

}

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

157

"""

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

158

return self._results[type]

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

159

160

@staticmethod

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

161

def _read_dicts_from_root(root, pattern='*.json'):

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

162

"""Read all JSON dictionaries within a directory tree.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

163

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

164

Args:

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

165

root: path to root of directory tree

166

pattern: which files to read within root (fnmatch-style pattern)

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

167

168

Returns:

169

A meta-dictionary containing all the JSON dictionaries found within

170

the directory tree, keyed by the builder name of each dictionary.

epoger@google.com

2013-10-15 20:10:33 +0000

[diff] [blame]

171

172

Raises:

173

IOError if root does not refer to an existing directory

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

174

"""

epoger@google.com

2013-10-15 20:10:33 +0000

[diff] [blame]

175

if not os.path.isdir(root):

176

raise IOError('no directory found at path %s' % root)

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

177

meta_dict = {}

178

for dirpath, dirnames, filenames in os.walk(root):

179

for matching_filename in fnmatch.filter(filenames, pattern):

180

builder = os.path.basename(dirpath)

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

181

# If we are reading from the collection of actual results, skip over

182

# the Trybot results (we don't maintain baselines for them).

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

183

if builder.endswith('-Trybot'):

184

continue

185

fullpath = os.path.join(dirpath, matching_filename)

186

meta_dict[builder] = gm_json.LoadFromFile(fullpath)

187

return meta_dict

188

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

189

@staticmethod

190

def _write_dicts_to_root(meta_dict, root, pattern='*.json'):

191

"""Write all per-builder dictionaries within meta_dict to files under

192

the root path.

193

194

Security note: this will only write to files that already exist within

195

the root path (as found by os.walk() within root), so we don't need to

196

worry about malformed content writing to disk outside of root.

197

However, the data written to those files is not double-checked, so it

198

could contain poisonous data.

199

200

Args:

201

meta_dict: a builder-keyed meta-dictionary containing all the JSON

202

dictionaries we want to write out

203

root: path to root of directory tree within which to write files

204

pattern: which files to write within root (fnmatch-style pattern)

205

206

Raises:

207

IOError if root does not refer to an existing directory

208

KeyError if the set of per-builder dictionaries written out was

209

different than expected

210

"""

211

if not os.path.isdir(root):

212

raise IOError('no directory found at path %s' % root)

213

actual_builders_written = []

214

for dirpath, dirnames, filenames in os.walk(root):

215

for matching_filename in fnmatch.filter(filenames, pattern):

216

builder = os.path.basename(dirpath)

217

# We should never encounter Trybot *expectations*, but if we are

218

# writing into the actual-results dir, skip the Trybot actuals.

219

# (I don't know why we would ever write into the actual-results dir,

220

# though.)

221

if builder.endswith('-Trybot'):

222

continue

223

per_builder_dict = meta_dict.get(builder)

224

if per_builder_dict:

225

fullpath = os.path.join(dirpath, matching_filename)

226

gm_json.WriteToFile(per_builder_dict, fullpath)

227

actual_builders_written.append(builder)

228

229

# Check: did we write out the set of per-builder dictionaries we

230

# expected to?

231

expected_builders_written = sorted(meta_dict.keys())

232

actual_builders_written.sort()

233

if expected_builders_written != actual_builders_written:

234

raise KeyError(

235

'expected to write dicts for builders %s, but actually wrote them '

236

'for builders %s' % (

237

expected_builders_written, actual_builders_written))

238

239

def _load_actual_and_expected(self):

240

"""Loads the results of all tests, across all builders (based on the

241

files within self._actuals_root and self._expected_root),

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

242

and stores them in self._results.

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

243

"""

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

244

actual_builder_dicts = Results._read_dicts_from_root(self._actuals_root)

245

expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)

246

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

247

categories_all = {}

248

categories_failures = {}

249

Results._ensure_included_in_category_dict(categories_all,

250

'resultType', [

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

251

gm_json.JSONKEY_ACTUALRESULTS_FAILED,

252

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

253

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

254

gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,

255

])

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

256

Results._ensure_included_in_category_dict(categories_failures,

257

'resultType', [

258

gm_json.JSONKEY_ACTUALRESULTS_FAILED,

259

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,

260

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

261

])

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

262

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

263

data_all = []

264

data_failures = []

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

265

for builder in sorted(actual_builder_dicts.keys()):

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

266

actual_results_for_this_builder = (

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

267

actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

268

for result_type in sorted(actual_results_for_this_builder.keys()):

269

results_of_this_type = actual_results_for_this_builder[result_type]

270

if not results_of_this_type:

271

continue

272

for image_name in sorted(results_of_this_type.keys()):

273

actual_image = results_of_this_type[image_name]

274

try:

275

# TODO(epoger): assumes a single allowed digest per test

276

expected_image = (

epoger@google.com

2013-10-23 15:07:26 +0000

[diff] [blame^]

277

expected_builder_dicts

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

278

[builder][gm_json.JSONKEY_EXPECTEDRESULTS]

279

[image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS]

280

[0])

281

except (KeyError, TypeError):

282

# There are several cases in which we would expect to find

283

# no expectations for a given test:

284

#

285

# 1. result_type == NOCOMPARISON

286

# There are no expectations for this test yet!

287

#

288

# 2. ignore-tests.txt

289

# If a test has been listed in ignore-tests.txt, then its status

290

# may show as FAILUREIGNORED even if it doesn't have any

291

# expectations yet.

292

#

293

# 3. alternate rendering mode failures (e.g. serialized)

294

# In cases like

295

# https://code.google.com/p/skia/issues/detail?id=1684

296

# ('tileimagefilter GM test failing in serialized render mode'),

297

# the gm-actuals will list a failure for the alternate

298

# rendering mode even though we don't have explicit expectations

299

# for the test (the implicit expectation is that it must

300

# render the same in all rendering modes).

301

#

302

# Don't log types 1 or 2, because they are common.

303

# Log other types, because they are rare and we should know about

304

# them, but don't throw an exception, because we need to keep our

305

# tools working in the meanwhile!

306

if result_type not in [

307

gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,

308

gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED] :

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

309

logging.warning('No expectations found for test: %s' % {

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

310

'builder': builder,

311

'image_name': image_name,

312

'result_type': result_type,

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

313

})

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

314

expected_image = [None, None]

315

316

# If this test was recently rebaselined, it will remain in

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

317

# the 'failed' set of actuals until all the bots have

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

318

# cycled (although the expectations have indeed been set

319

# from the most recent actuals). Treat these as successes

320

# instead of failures.

321

#

322

# TODO(epoger): Do we need to do something similar in

323

# other cases, such as when we have recently marked a test

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

324

# as ignoreFailure but it still shows up in the 'failed'

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

325

# category? Maybe we should not rely on the result_type

326

# categories recorded within the gm_actuals AT ALL, and

327

# instead evaluate the result_type ourselves based on what

328

# we see in expectations vs actual checksum?

329

if expected_image == actual_image:

330

updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED

331

else:

332

updated_result_type = result_type

333

epoger@google.com

2013-09-27 15:02:44 +0000

[diff] [blame]

334

(test, config) = IMAGE_FILENAME_RE.match(image_name).groups()

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

335

results_for_this_test = {

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

'builder': builder,

'test': test,

'config': config,

'resultType': updated_result_type,

340

'actualHashType': actual_image[0],

341

'actualHashDigest': str(actual_image[1]),

342

'expectedHashType': expected_image[0],

343

'expectedHashDigest': str(expected_image[1]),

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

344

}

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

345

Results._add_to_category_dict(categories_all, results_for_this_test)

346

data_all.append(results_for_this_test)

347

if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:

348

Results._add_to_category_dict(categories_failures,

349

results_for_this_test)

350

data_failures.append(results_for_this_test)

self._results = {

RESULTS_ALL:

{'categories': categories_all, 'testData': data_all},

355

RESULTS_FAILURES:

356

{'categories': categories_failures, 'testData': data_failures},

357

}

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

358

359

@staticmethod

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

360

def _add_to_category_dict(category_dict, test_results):

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

361

"""Add test_results to the category dictionary we are building.

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

362

(See documentation of self.get_results_of_type() for the format of this

363

dictionary.)

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

364

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

365

Args:

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

366

category_dict: category dict-of-dicts to add to; modify this in-place

367

test_results: test data with which to update category_list, in a dict:

368

{

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

369

'category_name': 'category_value',

370

'category_name': 'category_value',

epoger@google.com

2013-09-30 15:06:25 +0000

[diff] [blame]

...

}

"""

for category in CATEGORIES_TO_SUMMARIZE:

375

category_value = test_results.get(category)

376

if not category_value:

377

continue # test_results did not include this category, keep going

378

if not category_dict.get(category):

379

category_dict[category] = {}

380

if not category_dict[category].get(category_value):

381

category_dict[category][category_value] = 0

382

category_dict[category][category_value] += 1

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

383

384

@staticmethod

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

385

def _ensure_included_in_category_dict(category_dict,

386

category_name, category_values):

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

387

"""Ensure that the category name/value pairs are included in category_dict,

388

even if there aren't any results with that name/value pair.

epoger@google.com

2013-10-11 18:45:33 +0000

[diff] [blame]

389

(See documentation of self.get_results_of_type() for the format of this

390

dictionary.)

epoger@google.com

2013-10-02 18:57:48 +0000

[diff] [blame]

391

epoger@google.com

2013-10-09 18:05:58 +0000

[diff] [blame]

392

Args:

epoger@google.com