Blame - tools/run_tests/run_performance_tests.py - platform/external/grpc-grpc

2016-03-28 09:32:20 -0700

[diff] [blame]

1

#!/usr/bin/env python2.7

#

# Redistribution and use in source and binary forms, with or without

6

# modification, are permitted provided that the following conditions are

7

# met:

8

#

9

# * Redistributions of source code must retain the above copyright

10

# notice, this list of conditions and the following disclaimer.

11

# * Redistributions in binary form must reproduce the above

12

# copyright notice, this list of conditions and the following disclaimer

13

# in the documentation and/or other materials provided with the

14

# distribution.

15

# * Neither the name of Google Inc. nor the names of its

16

# contributors may be used to endorse or promote products derived from

17

# this software without specific prior written permission.

18

#

19

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

20

# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

21

# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

22

# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

23

# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

24

# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

25

# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

26

# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

27

# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

28

# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

29

# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

30

31

"""Run performance tests locally or remotely."""

32

siddharthshukla

2016-07-07 16:08:01 +0200

[diff] [blame]

33

from __future__ import print_function

34

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

35

import argparse

Craig Tiller

accf16b

2016-09-15 09:08:32 -0700

[diff] [blame]

36

import collections

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

37

import itertools

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

38

import jobset

Craig Tiller

2016-03-03 12:51:53 -0800

[diff] [blame]

39

import json

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

40

import multiprocessing

41

import os

Craig Tiller

accf16b

2016-09-15 09:08:32 -0700

[diff] [blame]

42

import performance.scenario_config as scenario_config

Craig Tiller

2016-03-03 12:51:53 -0800

[diff] [blame]

43

import pipes

Jan Tattermusch

2016-04-14 08:00:35 -0700

[diff] [blame]

44

import re

Jan Tattermusch

2016-10-24 21:06:40 +0200

[diff] [blame]

45

import report_utils

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

import subprocess

import sys

import tempfile

import time

Jan Tattermusch

ee9032c

2016-04-14 08:35:51 -0700

[diff] [blame]

50

import traceback

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

51

import uuid

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

52

import report_utils

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

53

54

55

_ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))

os.chdir(_ROOT)

_REMOTE_HOST_USERNAME = 'jenkins'

60

61

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

62

class QpsWorkerJob:

63

"""Encapsulates a qps worker server job."""

64

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

65

def __init__(self, spec, language, host_and_port, perf_file_base_name=None):

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

66

self._spec = spec

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

67

self.language = language

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

68

self.host_and_port = host_and_port

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

69

self._job = None

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

70

self.perf_file_base_name = perf_file_base_name

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

71

72

def start(self):

Craig Tiller

c197ec1

2016-09-15 09:19:33 -0700

[diff] [blame]

73

self._job = jobset.Job(self._spec, newline_on_success=True, travis=True, add_env={})

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

74

75

def is_running(self):

76

"""Polls a job and returns True if given job is still running."""

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

77

return self._job and self._job.state() == jobset._RUNNING

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

78

79

def kill(self):

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

80

if self._job:

81

self._job.kill()

82

self._job = None

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

83

84

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

85

def create_qpsworker_job(language, shortname=None, port=10000, remote_host=None, perf_cmd=None):

86

cmdline = (language.worker_cmdline() + ['--driver_port=%s' % port])

87

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

88

if remote_host:

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

89

host_and_port='%s:%s' % (remote_host, port)

90

else:

91

host_and_port='localhost:%s' % port

92

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

93

perf_file_base_name = None

94

if perf_cmd:

95

perf_file_base_name = '%s-%s' % (host_and_port, shortname)

96

# specify -o output file so perf.data gets collected when worker stopped

97

cmdline = perf_cmd + ['-o', '%s-perf.data' % perf_file_base_name] + cmdline

98

99

if remote_host:

100

user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)

101

ssh_cmd = ['ssh']

102

ssh_cmd.extend([str(user_at_host), 'cd ~/performance_workspace/grpc/ && %s' % ' '.join(cmdline)])

103

cmdline = ssh_cmd

104

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

105

jobspec = jobset.JobSpec(

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

106

cmdline=cmdline,

107

shortname=shortname,

Jan Tattermusch

447548b

2016-10-17 12:04:56 +0200

[diff] [blame]

108

timeout_seconds=5*60, # workers get restarted after each scenario

109

verbose_success=True)

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

110

return QpsWorkerJob(jobspec, language, host_and_port, perf_file_base_name)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

111

112

Jan Tattermusch

2016-04-14 17:42:54 -0700

[diff] [blame]

113

def create_scenario_jobspec(scenario_json, workers, remote_host=None,

114

bq_result_table=None):

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

115

"""Runs one scenario using QPS driver."""

116

# setting QPS_WORKERS env variable here makes sure it works with SSH too.

Jan Tattermusch

2016-04-14 17:42:54 -0700

[diff] [blame]

117

cmd = 'QPS_WORKERS="%s" ' % ','.join(workers)

118

if bq_result_table:

119

cmd += 'BQ_RESULT_TABLE="%s" ' % bq_result_table

120

cmd += 'tools/run_tests/performance/run_qps_driver.sh '

121

cmd += '--scenarios_json=%s ' % pipes.quote(json.dumps({'scenarios': [scenario_json]}))

Vijay Pai

c23d33b

2016-07-19 11:19:12 -0700

[diff] [blame]

122

cmd += '--scenario_result_file=scenario_result.json'

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

123

if remote_host:

124

user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)

Jan Tattermusch

2016-04-14 17:42:54 -0700

[diff] [blame]

125

cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (user_at_host, pipes.quote(cmd))

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

126

127

return jobset.JobSpec(

128

cmdline=[cmd],

Craig Tiller

2016-03-03 12:51:53 -0800

[diff] [blame]

129

shortname='qps_json_driver.%s' % scenario_json['name'],

130

timeout_seconds=3*60,

131

shell=True,

132

verbose_success=True)

133

134

135

def create_quit_jobspec(workers, remote_host=None):

136

"""Runs quit using QPS driver."""

137

# setting QPS_WORKERS env variable here makes sure it works with SSH too.

Craig Tiller

025972d

2016-09-15 09:26:50 -0700

[diff] [blame]

138

cmd = 'QPS_WORKERS="%s" bins/opt/qps_json_driver --quit' % ','.join(w.host_and_port for w in workers)

Craig Tiller

2016-03-03 12:51:53 -0800

[diff] [blame]

139

if remote_host:

140

user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, remote_host)

Jan Tattermusch

2016-04-14 17:42:54 -0700

[diff] [blame]

141

cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (user_at_host, pipes.quote(cmd))

Craig Tiller

2016-03-03 12:51:53 -0800

[diff] [blame]

142

143

return jobset.JobSpec(

144

cmdline=[cmd],

vjpai

29089c7

2016-04-20 12:38:16 -0700

[diff] [blame]

145

shortname='qps_json_driver.quit',

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

146

timeout_seconds=3*60,

147

shell=True,

148

verbose_success=True)

149

150

Jan Tattermusch

2016-05-10 14:33:07 -0700

[diff] [blame]

151

def create_netperf_jobspec(server_host='localhost', client_host=None,

152

bq_result_table=None):

153

"""Runs netperf benchmark."""

154

cmd = 'NETPERF_SERVER_HOST="%s" ' % server_host

155

if bq_result_table:

156

cmd += 'BQ_RESULT_TABLE="%s" ' % bq_result_table

Jan Tattermusch

ad17bf7

2016-05-11 12:41:37 -0700

[diff] [blame]

157

if client_host:

158

# If netperf is running remotely, the env variables populated by Jenkins

159

# won't be available on the client, but we need them for uploading results

160

# to BigQuery.

161

jenkins_job_name = os.getenv('JOB_NAME')

162

if jenkins_job_name:

163

cmd += 'JOB_NAME="%s" ' % jenkins_job_name

164

jenkins_build_number = os.getenv('BUILD_NUMBER')

165

if jenkins_build_number:

166

cmd += 'BUILD_NUMBER="%s" ' % jenkins_build_number

167

Jan Tattermusch

2016-05-10 14:33:07 -0700

[diff] [blame]

168

cmd += 'tools/run_tests/performance/run_netperf.sh'

169

if client_host:

170

user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, client_host)

171

cmd = 'ssh %s "cd ~/performance_workspace/grpc/ && "%s' % (user_at_host, pipes.quote(cmd))

172

173

return jobset.JobSpec(

cmdline=[cmd],

shortname='netperf',

timeout_seconds=60,

shell=True,

verbose_success=True)

179

180

Jan Tattermusch

2016-04-18 09:21:37 -0700

[diff] [blame]

181

def archive_repo(languages):

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

182

"""Archives local version of repo including submodules."""

Jan Tattermusch

2016-04-18 09:21:37 -0700

[diff] [blame]

183

cmdline=['tar', '-cf', '../grpc.tar', '../grpc/']

184

if 'java' in languages:

185

cmdline.append('../grpc-java')

186

if 'go' in languages:

187

cmdline.append('../grpc-go')

188

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

189

archive_job = jobset.JobSpec(

Jan Tattermusch

2016-04-18 09:21:37 -0700

[diff] [blame]

190

cmdline=cmdline,

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

191

shortname='archive_repo',

192

timeout_seconds=3*60)

193

194

jobset.message('START', 'Archiving local repository.', do_newline=True)

195

num_failures, _ = jobset.run(

196

[archive_job], newline_on_success=True, maxjobs=1)

197

if num_failures == 0:

198

jobset.message('SUCCESS',

Jan Tattermusch

2016-04-18 09:21:37 -0700

[diff] [blame]

199

'Archive with local repository created successfully.',

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

200

do_newline=True)

201

else:

202

jobset.message('FAILED', 'Failed to archive local repository.',

do_newline=True)

sys.exit(1)

Jan Tattermusch

2016-04-27 17:55:27 -0700

[diff] [blame]

207

def prepare_remote_hosts(hosts, prepare_local=False):

208

"""Prepares remote hosts (and maybe prepare localhost as well)."""

209

prepare_timeout = 5*60

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

210

prepare_jobs = []

211

for host in hosts:

212

user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, host)

213

prepare_jobs.append(

214

jobset.JobSpec(

215

cmdline=['tools/run_tests/performance/remote_host_prepare.sh'],

216

shortname='remote_host_prepare.%s' % host,

217

environ = {'USER_AT_HOST': user_at_host},

Jan Tattermusch

1408920

2016-04-27 17:55:27 -0700

[diff] [blame]

218

timeout_seconds=prepare_timeout))

219

if prepare_local:

220

# Prepare localhost as well

221

prepare_jobs.append(

222

jobset.JobSpec(

223

cmdline=['tools/run_tests/performance/kill_workers.sh'],

224

shortname='local_prepare',

225

timeout_seconds=prepare_timeout))

226

jobset.message('START', 'Preparing hosts.', do_newline=True)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

227

num_failures, _ = jobset.run(

228

prepare_jobs, newline_on_success=True, maxjobs=10)

229

if num_failures == 0:

230

jobset.message('SUCCESS',

Jan Tattermusch

1408920

2016-04-27 17:55:27 -0700

[diff] [blame]

231

'Prepare step completed successfully.',

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

232

do_newline=True)

233

else:

234

jobset.message('FAILED', 'Failed to prepare remote hosts.',

do_newline=True)

sys.exit(1)

Craig Tiller

2016-04-04 13:49:29 -0700

[diff] [blame]

239

def build_on_remote_hosts(hosts, languages=scenario_config.LANGUAGES.keys(), build_local=False):

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

240

"""Builds performance worker on remote hosts (and maybe also locally)."""

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

241

build_timeout = 15*60

242

build_jobs = []

243

for host in hosts:

244

user_at_host = '%s@%s' % (_REMOTE_HOST_USERNAME, host)

245

build_jobs.append(

246

jobset.JobSpec(

Craig Tiller

7797e3f

2016-04-01 07:41:05 -0700

[diff] [blame]

247

cmdline=['tools/run_tests/performance/remote_host_build.sh'] + languages,

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

248

shortname='remote_host_build.%s' % host,

249

environ = {'USER_AT_HOST': user_at_host, 'CONFIG': 'opt'},

250

timeout_seconds=build_timeout))

251

if build_local:

252

# Build locally as well

253

build_jobs.append(

254

jobset.JobSpec(

Craig Tiller

7797e3f

2016-04-01 07:41:05 -0700

[diff] [blame]

255

cmdline=['tools/run_tests/performance/build_performance.sh'] + languages,

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

256

shortname='local_build',

257

environ = {'CONFIG': 'opt'},

258

timeout_seconds=build_timeout))

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

259

jobset.message('START', 'Building.', do_newline=True)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

260

num_failures, _ = jobset.run(

261

build_jobs, newline_on_success=True, maxjobs=10)

262

if num_failures == 0:

263

jobset.message('SUCCESS',

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

264

'Built successfully.',

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

265

do_newline=True)

266

else:

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

267

jobset.message('FAILED', 'Build failed.',

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

do_newline=True)

sys.exit(1)

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

272

def create_qpsworkers(languages, worker_hosts, perf_cmd=None):

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

273

"""Creates QPS workers (but does not start them)."""

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

274

if not worker_hosts:

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

275

# run two workers locally (for each language)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

276

workers=[(None, 10000), (None, 10010)]

277

elif len(worker_hosts) == 1:

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

278

# run two workers on the remote host (for each language)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

279

workers=[(worker_hosts[0], 10000), (worker_hosts[0], 10010)]

280

else:

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

281

# run one worker per each remote host (for each language)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

282

workers=[(worker_host, 10000) for worker_host in worker_hosts]

283

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

284

return [create_qpsworker_job(language,

285

shortname= 'qps_worker_%s_%s' % (language,

286

worker_idx),

287

port=worker[1] + language.worker_port_offset(),

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

288

remote_host=worker[0],

289

perf_cmd=perf_cmd)

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

290

for language in languages

291

for worker_idx, worker in enumerate(workers)]

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

292

293

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

294

def perf_report_processor_job(worker_host, perf_base_name, output_filename):

295

print('Creating perf report collection job for %s' % worker_host)

296

cmd = ''

297

if worker_host != 'localhost':

298

user_at_host = "%s@%s" % (_REMOTE_HOST_USERNAME, worker_host)

299

cmd = "USER_AT_HOST=%s OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s\

300

tools/run_tests/performance/process_remote_perf_flamegraphs.sh" \

Alexander Polcyn

2016-12-09 10:22:50 -0800

[diff] [blame^]

301

% (user_at_host, output_filename, args.flame_graph_reports, perf_base_name)

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

302

else:

303

cmd = "OUTPUT_FILENAME=%s OUTPUT_DIR=%s PERF_BASE_NAME=%s\

304

tools/run_tests/performance/process_local_perf_flamegraphs.sh" \

Alexander Polcyn

2016-12-09 10:22:50 -0800

[diff] [blame^]

305

% (output_filename, args.flame_graph_reports, perf_base_name)

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

306

307

return jobset.JobSpec(cmdline=cmd,

308

timeout_seconds=3*60,

309

shell=True,

310

verbose_success=True,

311

shortname='process perf report')

312

313

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

314

Scenario = collections.namedtuple('Scenario', 'jobspec workers name')

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

315

316

Jan Tattermusch

2016-04-14 17:42:54 -0700

[diff] [blame]

317

def create_scenarios(languages, workers_by_lang, remote_host=None, regex='.*',

Jan Tattermusch

2016-05-10 14:33:07 -0700

[diff] [blame]

318

category='all', bq_result_table=None,

319

netperf=False, netperf_hosts=[]):

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

320

"""Create jobspecs for scenarios to run."""

Ken Payson

0482c10

2016-04-19 12:08:34 -0700

[diff] [blame]

321

all_workers = [worker

322

for workers in workers_by_lang.values()

323

for worker in workers]

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

324

scenarios = []

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

325

_NO_WORKERS = []

Jan Tattermusch

2016-05-10 14:33:07 -0700

[diff] [blame]

326

327

if netperf:

328

if not netperf_hosts:

329

netperf_server='localhost'

330

netperf_client=None

331

elif len(netperf_hosts) == 1:

332

netperf_server=netperf_hosts[0]

333

netperf_client=netperf_hosts[0]

334

else:

335

netperf_server=netperf_hosts[0]

336

netperf_client=netperf_hosts[1]

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

337

scenarios.append(Scenario(

338

create_netperf_jobspec(server_host=netperf_server,

339

client_host=netperf_client,

340

bq_result_table=bq_result_table),

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

341

_NO_WORKERS, 'netperf'))

Jan Tattermusch

2016-05-10 14:33:07 -0700

[diff] [blame]

342

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

343

for language in languages:

Craig Tiller

2016-03-03 12:51:53 -0800

[diff] [blame]

344

for scenario_json in language.scenarios():

Jan Tattermusch

2016-04-14 08:00:35 -0700

[diff] [blame]

345

if re.search(args.regex, scenario_json['name']):

Craig Tiller

b6df247

2016-09-13 09:41:26 -0700

[diff] [blame]

346

categories = scenario_json.get('CATEGORIES', ['scalable', 'smoketest'])

347

if category in categories or category == 'all':

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

348

workers = workers_by_lang[str(language)][:]

Jan Tattermusch

2016-05-05 18:10:14 -0700

[diff] [blame]

349

# 'SERVER_LANGUAGE' is an indicator for this script to pick

350

# a server in different language.

351

custom_server_lang = scenario_json.get('SERVER_LANGUAGE', None)

Jan Tattermusch

2016-05-13 13:49:43 -0700

[diff] [blame]

352

custom_client_lang = scenario_json.get('CLIENT_LANGUAGE', None)

Jan Tattermusch

2016-05-05 18:10:14 -0700

[diff] [blame]

353

scenario_json = scenario_config.remove_nonproto_fields(scenario_json)

Jan Tattermusch

2016-05-13 13:49:43 -0700

[diff] [blame]

354

if custom_server_lang and custom_client_lang:

355

raise Exception('Cannot set both custom CLIENT_LANGUAGE and SERVER_LANGUAGE'

356

'in the same scenario')

Jan Tattermusch

2016-05-05 18:10:14 -0700

[diff] [blame]

357

if custom_server_lang:

358

if not workers_by_lang.get(custom_server_lang, []):

siddharthshukla

2016-07-07 16:08:01 +0200

[diff] [blame]

359

print('Warning: Skipping scenario %s as' % scenario_json['name'])

Jan Tattermusch

2016-05-05 18:10:14 -0700

[diff] [blame]

360

print('SERVER_LANGUAGE is set to %s yet the language has '

361

'not been selected with -l' % custom_server_lang)

362

continue

363

for idx in range(0, scenario_json['num_servers']):

364

# replace first X workers by workers of a different language

365

workers[idx] = workers_by_lang[custom_server_lang][idx]

Jan Tattermusch

2016-05-13 13:49:43 -0700

[diff] [blame]

366

if custom_client_lang:

367

if not workers_by_lang.get(custom_client_lang, []):

siddharthshukla

2016-07-07 16:08:01 +0200

[diff] [blame]

368

print('Warning: Skipping scenario %s as' % scenario_json['name'])

Jan Tattermusch

2016-05-13 13:49:43 -0700

[diff] [blame]

369

print('CLIENT_LANGUAGE is set to %s yet the language has '

370

'not been selected with -l' % custom_client_lang)

371

continue

372

for idx in range(scenario_json['num_servers'], len(workers)):

373

# replace all client workers by workers of a different language,

374

# leave num_server workers as they are server workers.

375

workers[idx] = workers_by_lang[custom_client_lang][idx]

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

376

scenario = Scenario(

377

create_scenario_jobspec(scenario_json,

378

[w.host_and_port for w in workers],

379

remote_host=remote_host,

380

bq_result_table=bq_result_table),

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

381

workers,

382

scenario_json['name'])

Jan Tattermusch

2016-05-05 18:10:14 -0700

[diff] [blame]

383

scenarios.append(scenario)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

384

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

return scenarios

def finish_qps_workers(jobs):

389

"""Waits for given jobs to finish and eventually kills them."""

390

retries = 0

Alexander Polcyn

2016-10-17 10:01:37 -0700

[diff] [blame]

391

num_killed = 0

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

392

while any(job.is_running() for job in jobs):

393

for job in qpsworker_jobs:

394

if job.is_running():

siddharthshukla

2016-07-07 16:08:01 +0200

[diff] [blame]

395

print('QPS worker "%s" is still running.' % job.host_and_port)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

396

if retries > 10:

siddharthshukla

2016-07-07 16:08:01 +0200

[diff] [blame]

397

print('Killing all QPS workers.')

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

398

for job in jobs:

399

job.kill()

Alexander Polcyn

2016-10-17 10:01:37 -0700

[diff] [blame]

400

num_killed += 1

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

401

retries += 1

402

time.sleep(3)

siddharthshukla

2016-07-07 16:08:01 +0200

[diff] [blame]

403

print('All QPS workers finished.')

Alexander Polcyn

2016-10-17 10:01:37 -0700

[diff] [blame]

404

return num_killed

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

405

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

406

profile_output_files = []

407

408

# Collect perf text reports and flamegraphs if perf_cmd was used

409

# Note the base names of perf text reports are used when creating and processing

410

# perf data. The scenario name uniqifies the output name in the final

411

# perf reports directory.

412

# Alos, the perf profiles need to be fetched and processed after each scenario

413

# in order to avoid clobbering the output files.

414

def run_collect_perf_profile_jobs(hosts_and_base_names, scenario_name):

415

perf_report_jobs = []

416

global profile_output_files

417

for host_and_port in hosts_and_base_names:

418

perf_base_name = hosts_and_base_names[host_and_port]

419

output_filename = '%s-%s' % (scenario_name, perf_base_name)

420

# from the base filename, create .svg output filename

421

host = host_and_port.split(':')[0]

422

profile_output_files.append('%s.svg' % output_filename)

423

perf_report_jobs.append(perf_report_processor_job(host, perf_base_name, output_filename))

424

425

jobset.message('START', 'Collecting perf reports from qps workers', do_newline=True)

426

failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1)

427

jobset.message('END', 'Collecting perf reports from qps workers', do_newline=True)

return failures

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

431

argp = argparse.ArgumentParser(description='Run performance tests.')

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

432

argp.add_argument('-l', '--language',

Craig Tiller

d92d5c5

2016-04-04 13:49:29 -0700

[diff] [blame]

433

choices=['all'] + sorted(scenario_config.LANGUAGES.keys()),

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

434

nargs='+',

Jan Tattermusch

2016-05-05 18:10:14 -0700

[diff] [blame]

435

required=True,

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

436

help='Languages to benchmark.')

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

437

argp.add_argument('--remote_driver_host',

438

default=None,

439

help='Run QPS driver on given host. By default, QPS driver is run locally.')

440

argp.add_argument('--remote_worker_host',

441

nargs='+',

442

default=[],

443

help='Worker hosts where to start QPS workers.')

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

444

argp.add_argument('--dry_run',

445

default=False,

446

action='store_const',

447

const=True,

448

help='Just list scenarios to be run, but don\'t run them.')

Jan Tattermusch

2016-04-14 08:00:35 -0700

[diff] [blame]

449

argp.add_argument('-r', '--regex', default='.*', type=str,

450

help='Regex to select scenarios to run.')

Jan Tattermusch

2016-04-14 17:42:54 -0700

[diff] [blame]

451

argp.add_argument('--bq_result_table', default=None, type=str,

452

help='Bigquery "dataset.table" to upload results to.')

Jan Tattermusch

2016-05-05 18:10:14 -0700

[diff] [blame]

453

argp.add_argument('--category',

Craig Tiller

6388da5

2016-09-07 17:06:29 -0700

[diff] [blame]

454

choices=['smoketest','all','scalable','sweep'],

Jan Tattermusch

2016-05-13 13:49:43 -0700

[diff] [blame]

455

default='all',

456

help='Select a category of tests to run.')

Jan Tattermusch

2016-05-10 14:33:07 -0700

[diff] [blame]

457

argp.add_argument('--netperf',

458

default=False,

459

action='store_const',

460

const=True,

461

help='Run netperf benchmark as one of the scenarios.')

Jan Tattermusch

88818ae

2016-11-18 14:21:33 +0100

[diff] [blame]

462

argp.add_argument('-x', '--xml_report', default='report.xml', type=str,

463

help='Name of XML report file to generate.')

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

464

argp.add_argument('--perf_args',

465

help=('Example usage: "--perf_args=record -F 99 -g". '

466

'Wrap QPS workers in a perf command '

467

'with the arguments to perf specified here. '

468

'".svg" flame graph profiles will be '

469

'created for each Qps Worker on each scenario. '

Alexander Polcyn

2016-12-09 10:22:50 -0800

[diff] [blame^]

470

'Files will output to "<repo_root>/<args.flame_graph_reports>" '

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

471

'directory. Output files from running the worker '

472

'under perf are saved in the repo root where its ran. '

473

'Note that the perf "-g" flag is necessary for '

474

'flame graphs generation to work (assuming the binary '

475

'being profiled uses frame pointers, check out '

476

'"--call-graph dwarf" option using libunwind otherwise.) '

477

'Also note that the entire "--perf_args=<arg(s)>" must '

478

'be wrapped in quotes as in the example usage. '

479

'If the "--perg_args" is unspecified, "perf" will '

480

'not be used at all. '

481

'See http://www.brendangregg.com/perf.html '

482

'for more general perf examples.'))

483

argp.add_argument('--skip_generate_flamegraphs',

484

default=False,

485

action='store_const',

486

const=True,

487

help=('Turn flame graph generation off. '

488

'May be useful if "perf_args" arguments do not make sense for '

489

'generating flamegraphs (e.g., "--perf_args=stat ...")'))

Alexander Polcyn

2016-12-09 10:22:50 -0800

[diff] [blame^]

490

argp.add_argument('-f', '--flame_graph_reports', default='perf_reports', type=str,

491

help='Name of directory to output flame graph profiles to, if any are created.')

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

492

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

493

494

args = argp.parse_args()

495

Craig Tiller

d92d5c5

2016-04-04 13:49:29 -0700

[diff] [blame]

496

languages = set(scenario_config.LANGUAGES[l]

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

497

for l in itertools.chain.from_iterable(

Craig Tiller

d92d5c5

2016-04-04 13:49:29 -0700

[diff] [blame]

498

scenario_config.LANGUAGES.iterkeys() if x == 'all' else [x]

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

499

for x in args.language))

500

Jan Tattermusch

2016-04-14 17:42:54 -0700

[diff] [blame]

501

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

502

# Put together set of remote hosts where to run and build

503

remote_hosts = set()

504

if args.remote_worker_host:

505

for host in args.remote_worker_host:

506

remote_hosts.add(host)

507

if args.remote_driver_host:

508

remote_hosts.add(args.remote_driver_host)

509

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

510

if not args.dry_run:

511

if remote_hosts:

512

archive_repo(languages=[str(l) for l in languages])

513

prepare_remote_hosts(remote_hosts, prepare_local=True)

514

else:

515

prepare_remote_hosts([], prepare_local=True)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

516

517

build_local = False

518

if not args.remote_driver_host:

519

build_local = True

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

520

if not args.dry_run:

521

build_on_remote_hosts(remote_hosts, languages=[str(l) for l in languages], build_local=build_local)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

522

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

523

perf_cmd = None

524

if args.perf_args:

Alexander Polcyn

2016-12-09 10:22:50 -0800

[diff] [blame^]

525

print('Running workers under perf profiler')

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

526

# Expect /usr/bin/perf to be installed here, as is usual

Alexander Polcyn

2016-12-09 10:22:50 -0800

[diff] [blame^]

527

perf_cmd = ['/usr/bin/perf']

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

528

perf_cmd.extend(re.split('\s+', args.perf_args))

529

530

qpsworker_jobs = create_qpsworkers(languages, args.remote_worker_host, perf_cmd=perf_cmd)

Jan Tattermusch

2016-04-14 08:00:35 -0700

[diff] [blame]

531

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

532

# get list of worker addresses for each language.

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

533

workers_by_lang = dict([(str(language), []) for language in languages])

Jan Tattermusch

2016-03-30 18:04:01 -0700

[diff] [blame]

534

for job in qpsworker_jobs:

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

535

workers_by_lang[str(job.language)].append(job)

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

536

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

537

scenarios = create_scenarios(languages,

Craig Tiller

d82fccc

2016-09-15 09:15:23 -0700

[diff] [blame]

538

workers_by_lang=workers_by_lang,

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

539

remote_host=args.remote_driver_host,

540

regex=args.regex,

541

category=args.category,

542

bq_result_table=args.bq_result_table,

543

netperf=args.netperf,

544

netperf_hosts=args.remote_worker_host)

Jan Tattermusch

2016-05-10 14:33:07 -0700

[diff] [blame]

545

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

546

if not scenarios:

547

raise Exception('No scenarios to run')

Jan Tattermusch

2016-03-28 09:32:20 -0700

[diff] [blame]

548

Alex Polcyn

cac93f6

2016-10-19 09:27:57 -0700

[diff] [blame]

549

total_scenario_failures = 0

Alexander Polcyn

898a2e9

2016-10-22 17:41:23 -0700

[diff] [blame]

550

qps_workers_killed = 0

Jan Tattermusch

2016-10-24 21:06:40 +0200

[diff] [blame]

551

merged_resultset = {}

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

552

perf_report_failures = 0

553

Craig Tiller

2016-09-15 08:57:14 -0700

[diff] [blame]

554

for scenario in scenarios:

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

555

if args.dry_run:

556

print(scenario.name)

557

else:

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

558

scenario_failures = 0

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

559

try:

560

for worker in scenario.workers:

561

worker.start()

Alex Polcyn

fcf09ea

2016-12-06 04:00:05 +0000

[diff] [blame]

562

jobs = [scenario.jobspec]

Alex Polcyn

ca5e924

2016-12-06 04:21:37 +0000

[diff] [blame]

563

if scenario.workers:

Alex Polcyn

fcf09ea

2016-12-06 04:00:05 +0000

[diff] [blame]

564

jobs.append(create_quit_jobspec(scenario.workers, remote_host=args.remote_driver_host))

565

scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1)

Alex Polcyn

cac93f6

2016-10-19 09:27:57 -0700

[diff] [blame]

566

total_scenario_failures += scenario_failures

Jan Tattermusch

2016-10-24 21:06:40 +0200

[diff] [blame]

567

merged_resultset = dict(itertools.chain(merged_resultset.iteritems(),

568

resultset.iteritems()))

Craig Tiller

2016-09-26 07:37:28 -0700

[diff] [blame]

569

finally:

Alexander Polcyn

898a2e9

2016-10-22 17:41:23 -0700

[diff] [blame]

570

# Consider qps workers that need to be killed as failures

571

qps_workers_killed += finish_qps_workers(scenario.workers)

Alexander Polcyn

2016-10-17 10:01:37 -0700

[diff] [blame]

572

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

573

if perf_cmd and scenario_failures == 0 and not args.skip_generate_flamegraphs:

574

workers_and_base_names = {}

575

for worker in scenario.workers:

576

if not worker.perf_file_base_name:

577

raise Exception('using perf buf perf report filename is unspecified')

578

workers_and_base_names[worker.host_and_port] = worker.perf_file_base_name

579

perf_report_failures += run_collect_perf_profile_jobs(workers_and_base_names, scenario.name)

580

581

582

# Still write the index.html even if some scenarios failed.

583

# 'profile_output_files' will only have names for scenarios that passed

584

if perf_cmd and not args.skip_generate_flamegraphs:

585

# write the index fil to the output dir, with all profiles from all scenarios/workers

Alexander Polcyn

2016-12-09 10:22:50 -0800

[diff] [blame^]

586

report_utils.render_perf_profiling_results('%s/index.html' % args.flame_graph_reports, profile_output_files)

Alexander Polcyn

2016-10-24 12:25:02 -0700

[diff] [blame]

587

588

if total_scenario_failures > 0 or qps_workers_killed > 0:

589

print('%s scenarios failed and %s qps worker jobs killed' % (total_scenario_failures, qps_workers_killed))

590

sys.exit(1)

Jan Tattermusch

2016-10-24 21:06:40 +0200

[diff] [blame]

591

Jan Tattermusch

88818ae

2016-11-18 14:21:33 +0100

[diff] [blame]

592

report_utils.render_junit_xml_report(merged_resultset, args.xml_report,

Jan Tattermusch

2016-10-24 21:06:40 +0200

[diff] [blame]

593

suite_name='benchmarks')

Alexander Polcyn