Nathaniel Manista | cbf21da | 2016-02-02 22:17:44 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python2.7 |
Craig Tiller | 1f980a8 | 2016-01-15 16:11:48 -0800 | [diff] [blame] | 2 | # Copyright 2015-2016, Google Inc. |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 3 | # All rights reserved. |
| 4 | # |
| 5 | # Redistribution and use in source and binary forms, with or without |
| 6 | # modification, are permitted provided that the following conditions are |
| 7 | # met: |
| 8 | # |
| 9 | # * Redistributions of source code must retain the above copyright |
| 10 | # notice, this list of conditions and the following disclaimer. |
| 11 | # * Redistributions in binary form must reproduce the above |
| 12 | # copyright notice, this list of conditions and the following disclaimer |
| 13 | # in the documentation and/or other materials provided with the |
| 14 | # distribution. |
| 15 | # * Neither the name of Google Inc. nor the names of its |
| 16 | # contributors may be used to endorse or promote products derived from |
| 17 | # this software without specific prior written permission. |
| 18 | # |
| 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | """Run stress test in C++""" |
| 31 | |
| 32 | import argparse |
| 33 | import atexit |
| 34 | import dockerjob |
| 35 | import itertools |
| 36 | import jobset |
| 37 | import json |
| 38 | import multiprocessing |
| 39 | import os |
| 40 | import re |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 41 | import subprocess |
| 42 | import sys |
| 43 | import tempfile |
| 44 | import time |
| 45 | import uuid |
| 46 | |
| 47 | # Docker doesn't clean up after itself, so we do it on exit. |
| 48 | atexit.register(lambda: subprocess.call(['stty', 'echo'])) |
| 49 | |
| 50 | ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..')) |
| 51 | os.chdir(ROOT) |
| 52 | |
| 53 | _DEFAULT_SERVER_PORT = 8080 |
| 54 | _DEFAULT_METRICS_PORT = 8081 |
| 55 | _DEFAULT_TEST_CASES = 'empty_unary:20,large_unary:20,client_streaming:20,server_streaming:20,empty_stream:20' |
| 56 | _DEFAULT_NUM_CHANNELS_PER_SERVER = 5 |
| 57 | _DEFAULT_NUM_STUBS_PER_CHANNEL = 10 |
| 58 | |
| 59 | # 15 mins default |
Sree Kuchibhotla | 6697760 | 2016-01-04 14:34:11 -0800 | [diff] [blame] | 60 | _DEFAULT_TEST_DURATION_SECS = 900 |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 61 | |
| 62 | class CXXLanguage: |
| 63 | |
| 64 | def __init__(self): |
| 65 | self.client_cwd = None |
| 66 | self.server_cwd = None |
| 67 | self.safename = 'cxx' |
| 68 | |
| 69 | def client_cmd(self, args): |
| 70 | return ['bins/opt/stress_test'] + args |
| 71 | |
| 72 | def server_cmd(self, args): |
| 73 | return ['bins/opt/interop_server'] + args |
| 74 | |
| 75 | def global_env(self): |
| 76 | return {} |
| 77 | |
| 78 | def __str__(self): |
| 79 | return 'c++' |
| 80 | |
| 81 | |
| 82 | _LANGUAGES = {'c++': CXXLanguage(),} |
| 83 | |
| 84 | # languages supported as cloud_to_cloud servers |
| 85 | _SERVERS = ['c++'] |
| 86 | |
| 87 | DOCKER_WORKDIR_ROOT = '/var/local/git/grpc' |
| 88 | |
| 89 | |
| 90 | def docker_run_cmdline(cmdline, image, docker_args=[], cwd=None, environ=None): |
| 91 | """Wraps given cmdline array to create 'docker run' cmdline from it.""" |
| 92 | docker_cmdline = ['docker', 'run', '-i', '--rm=true'] |
| 93 | |
| 94 | # turn environ into -e docker args |
| 95 | if environ: |
| 96 | for k, v in environ.iteritems(): |
| 97 | docker_cmdline += ['-e', '%s=%s' % (k, v)] |
| 98 | |
| 99 | # set working directory |
| 100 | workdir = DOCKER_WORKDIR_ROOT |
| 101 | if cwd: |
| 102 | workdir = os.path.join(workdir, cwd) |
| 103 | docker_cmdline += ['-w', workdir] |
| 104 | |
| 105 | docker_cmdline += docker_args + [image] + cmdline |
| 106 | return docker_cmdline |
| 107 | |
| 108 | |
| 109 | def bash_login_cmdline(cmdline): |
| 110 | """Creates bash -l -c cmdline from args list.""" |
| 111 | # Use login shell: |
| 112 | # * rvm and nvm require it |
| 113 | # * makes error messages clearer if executables are missing |
| 114 | return ['bash', '-l', '-c', ' '.join(cmdline)] |
| 115 | |
| 116 | |
| 117 | def _job_kill_handler(job): |
| 118 | if job._spec.container_name: |
| 119 | dockerjob.docker_kill(job._spec.container_name) |
| 120 | # When the job times out and we decide to kill it, |
| 121 | # we need to wait a before restarting the job |
| 122 | # to prevent "container name already in use" error. |
| 123 | # TODO(jtattermusch): figure out a cleaner way to to this. |
| 124 | time.sleep(2) |
| 125 | |
| 126 | |
| 127 | def cloud_to_cloud_jobspec(language, |
| 128 | test_cases, |
| 129 | server_addresses, |
| 130 | test_duration_secs, |
| 131 | num_channels_per_server, |
| 132 | num_stubs_per_channel, |
| 133 | metrics_port, |
| 134 | docker_image=None): |
| 135 | """Creates jobspec for cloud-to-cloud interop test""" |
| 136 | cmdline = bash_login_cmdline(language.client_cmd([ |
| 137 | '--test_cases=%s' % test_cases, '--server_addresses=%s' % |
| 138 | server_addresses, '--test_duration_secs=%s' % test_duration_secs, |
| 139 | '--num_stubs_per_channel=%s' % num_stubs_per_channel, |
| 140 | '--num_channels_per_server=%s' % num_channels_per_server, |
| 141 | '--metrics_port=%s' % metrics_port |
| 142 | ])) |
| 143 | print cmdline |
| 144 | cwd = language.client_cwd |
| 145 | environ = language.global_env() |
| 146 | if docker_image: |
| 147 | container_name = dockerjob.random_name('interop_client_%s' % |
| 148 | language.safename) |
| 149 | cmdline = docker_run_cmdline( |
| 150 | cmdline, |
| 151 | image=docker_image, |
| 152 | environ=environ, |
| 153 | cwd=cwd, |
| 154 | docker_args=['--net=host', '--name', container_name]) |
| 155 | cwd = None |
| 156 | |
| 157 | test_job = jobset.JobSpec(cmdline=cmdline, |
| 158 | cwd=cwd, |
| 159 | environ=environ, |
| 160 | shortname='cloud_to_cloud:%s:%s_server:stress_test' % ( |
| 161 | language, server_name), |
| 162 | timeout_seconds=test_duration_secs * 2, |
Sree Kuchibhotla | e371742 | 2016-01-04 10:14:38 -0800 | [diff] [blame] | 163 | flake_retries=0, |
| 164 | timeout_retries=0, |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 165 | kill_handler=_job_kill_handler) |
| 166 | test_job.container_name = container_name |
| 167 | return test_job |
| 168 | |
| 169 | |
| 170 | def server_jobspec(language, docker_image, test_duration_secs): |
| 171 | """Create jobspec for running a server""" |
| 172 | container_name = dockerjob.random_name('interop_server_%s' % |
| 173 | language.safename) |
| 174 | cmdline = bash_login_cmdline(language.server_cmd(['--port=%s' % |
| 175 | _DEFAULT_SERVER_PORT])) |
| 176 | environ = language.global_env() |
| 177 | docker_cmdline = docker_run_cmdline( |
| 178 | cmdline, |
| 179 | image=docker_image, |
| 180 | cwd=language.server_cwd, |
| 181 | environ=environ, |
| 182 | docker_args=['-p', str(_DEFAULT_SERVER_PORT), '--name', container_name]) |
| 183 | |
| 184 | server_job = jobset.JobSpec(cmdline=docker_cmdline, |
| 185 | environ=environ, |
| 186 | shortname='interop_server_%s' % language, |
| 187 | timeout_seconds=test_duration_secs * 3) |
| 188 | server_job.container_name = container_name |
| 189 | return server_job |
| 190 | |
| 191 | |
Sree Kuchibhotla | e371742 | 2016-01-04 10:14:38 -0800 | [diff] [blame] | 192 | def build_interop_stress_image_jobspec(language, tag=None): |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 193 | """Creates jobspec for building stress test docker image for a language""" |
| 194 | if not tag: |
Sree Kuchibhotla | e371742 | 2016-01-04 10:14:38 -0800 | [diff] [blame] | 195 | tag = 'grpc_interop_stress_%s:%s' % (language.safename, uuid.uuid4()) |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 196 | env = {'INTEROP_IMAGE': tag, |
Sree Kuchibhotla | e371742 | 2016-01-04 10:14:38 -0800 | [diff] [blame] | 197 | 'BASE_NAME': 'grpc_interop_stress_%s' % language.safename} |
Sree Kuchibhotla | e371742 | 2016-01-04 10:14:38 -0800 | [diff] [blame] | 198 | build_job = jobset.JobSpec(cmdline=['tools/jenkins/build_interop_stress_image.sh'], |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 199 | environ=env, |
| 200 | shortname='build_docker_%s' % (language), |
| 201 | timeout_seconds=30 * 60) |
| 202 | build_job.tag = tag |
| 203 | return build_job |
| 204 | |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 205 | argp = argparse.ArgumentParser(description='Run stress tests.') |
| 206 | argp.add_argument('-l', |
| 207 | '--language', |
| 208 | choices=['all'] + sorted(_LANGUAGES), |
| 209 | nargs='+', |
| 210 | default=['all'], |
| 211 | help='Clients to run.') |
| 212 | argp.add_argument('-j', '--jobs', default=multiprocessing.cpu_count(), type=int) |
| 213 | argp.add_argument( |
| 214 | '-s', |
| 215 | '--server', |
| 216 | choices=['all'] + sorted(_SERVERS), |
| 217 | action='append', |
| 218 | help='Run cloud_to_cloud servers in a separate docker ' + 'image.', |
| 219 | default=[]) |
| 220 | argp.add_argument( |
| 221 | '--override_server', |
| 222 | action='append', |
| 223 | type=lambda kv: kv.split('='), |
| 224 | help= |
| 225 | 'Use servername=HOST:PORT to explicitly specify a server. E.g. ' |
| 226 | 'csharp=localhost:50000', |
| 227 | default=[]) |
| 228 | argp.add_argument('--test_duration_secs', |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 229 | help='The duration of the test in seconds', |
Sree Kuchibhotla | ca8e3d7 | 2016-01-04 09:39:05 -0800 | [diff] [blame] | 230 | default=_DEFAULT_TEST_DURATION_SECS) |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 231 | |
| 232 | args = argp.parse_args() |
| 233 | |
| 234 | servers = set( |
| 235 | s |
| 236 | for s in itertools.chain.from_iterable(_SERVERS if x == 'all' else [x] |
| 237 | for x in args.server)) |
| 238 | |
| 239 | languages = set(_LANGUAGES[l] |
| 240 | for l in itertools.chain.from_iterable(_LANGUAGES.iterkeys( |
| 241 | ) if x == 'all' else [x] for x in args.language)) |
| 242 | |
| 243 | docker_images = {} |
| 244 | # languages for which to build docker images |
| 245 | languages_to_build = set( |
| 246 | _LANGUAGES[k] |
| 247 | for k in set([str(l) for l in languages] + [s for s in servers])) |
| 248 | build_jobs = [] |
| 249 | for l in languages_to_build: |
Sree Kuchibhotla | e371742 | 2016-01-04 10:14:38 -0800 | [diff] [blame] | 250 | job = build_interop_stress_image_jobspec(l) |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 251 | docker_images[str(l)] = job.tag |
| 252 | build_jobs.append(job) |
| 253 | |
| 254 | if build_jobs: |
| 255 | jobset.message('START', 'Building interop docker images.', do_newline=True) |
| 256 | num_failures, _ = jobset.run(build_jobs, |
| 257 | newline_on_success=True, |
| 258 | maxjobs=args.jobs) |
| 259 | if num_failures == 0: |
| 260 | jobset.message('SUCCESS', |
| 261 | 'All docker images built successfully.', |
| 262 | do_newline=True) |
| 263 | else: |
| 264 | jobset.message('FAILED', |
| 265 | 'Failed to build interop docker images.', |
| 266 | do_newline=True) |
| 267 | for image in docker_images.itervalues(): |
| 268 | dockerjob.remove_image(image, skip_nonexistent=True) |
| 269 | sys.exit(1) |
| 270 | |
| 271 | # Start interop servers. |
| 272 | server_jobs = {} |
| 273 | server_addresses = {} |
| 274 | try: |
| 275 | for s in servers: |
| 276 | lang = str(s) |
Sree Kuchibhotla | ca8e3d7 | 2016-01-04 09:39:05 -0800 | [diff] [blame] | 277 | spec = server_jobspec(_LANGUAGES[lang], docker_images.get(lang), args.test_duration_secs) |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 278 | job = dockerjob.DockerJob(spec) |
| 279 | server_jobs[lang] = job |
| 280 | server_addresses[lang] = ('localhost', |
| 281 | job.mapped_port(_DEFAULT_SERVER_PORT)) |
| 282 | |
| 283 | jobs = [] |
| 284 | |
| 285 | for server in args.override_server: |
| 286 | server_name = server[0] |
| 287 | (server_host, server_port) = server[1].split(':') |
| 288 | server_addresses[server_name] = (server_host, server_port) |
| 289 | |
| 290 | for server_name, server_address in server_addresses.iteritems(): |
| 291 | (server_host, server_port) = server_address |
| 292 | for language in languages: |
| 293 | test_job = cloud_to_cloud_jobspec( |
| 294 | language, |
| 295 | _DEFAULT_TEST_CASES, |
| 296 | ('%s:%s' % (server_host, server_port)), |
Sree Kuchibhotla | ca8e3d7 | 2016-01-04 09:39:05 -0800 | [diff] [blame] | 297 | args.test_duration_secs, |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 298 | _DEFAULT_NUM_CHANNELS_PER_SERVER, |
| 299 | _DEFAULT_NUM_STUBS_PER_CHANNEL, |
| 300 | _DEFAULT_METRICS_PORT, |
| 301 | docker_image=docker_images.get(str(language))) |
| 302 | jobs.append(test_job) |
| 303 | |
| 304 | if not jobs: |
| 305 | print 'No jobs to run.' |
| 306 | for image in docker_images.itervalues(): |
| 307 | dockerjob.remove_image(image, skip_nonexistent=True) |
| 308 | sys.exit(1) |
| 309 | |
| 310 | num_failures, resultset = jobset.run(jobs, |
| 311 | newline_on_success=True, |
| 312 | maxjobs=args.jobs) |
| 313 | if num_failures: |
| 314 | jobset.message('FAILED', 'Some tests failed', do_newline=True) |
| 315 | else: |
| 316 | jobset.message('SUCCESS', 'All tests passed', do_newline=True) |
| 317 | |
Sree Kuchibhotla | 1b38bb4 | 2015-12-14 17:22:38 -0800 | [diff] [blame] | 318 | finally: |
| 319 | # Check if servers are still running. |
| 320 | for server, job in server_jobs.iteritems(): |
| 321 | if not job.is_running(): |
| 322 | print 'Server "%s" has exited prematurely.' % server |
| 323 | |
| 324 | dockerjob.finish_jobs([j for j in server_jobs.itervalues()]) |
| 325 | |
| 326 | for image in docker_images.itervalues(): |
| 327 | print 'Removing docker image %s' % image |
| 328 | dockerjob.remove_image(image) |