Blame - server/autoserv - platform/external/autotest

2014-10-23 13:56:41 -0700

[diff] [blame]

import ast

import datetime

import getpass

import logging

import os

import re

import signal

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

16

import socket

Fang Deng

2014-10-23 13:56:41 -0700

[diff] [blame]

import sys

import traceback

import time

import urllib2

mbligh

1ffd5dc

2008-11-25 13:24:05 +0000

[diff] [blame]

21

mbligh

f5427bb

2008-04-09 15:55:57 +0000

[diff] [blame]

22

import common

mbligh

9ff89cd

2009-09-03 20:28:17 +0000

[diff] [blame]

23

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

24

from autotest_lib.client.common_lib import control_data

25

from autotest_lib.client.common_lib import global_config

Dan Shi

5ddf6a3

2015-05-02 00:22:01 -0700

[diff] [blame]

26

from autotest_lib.client.common_lib import utils

Dan Shi

2015-04-13 15:46:47 -0700

[diff] [blame]

27

from autotest_lib.client.common_lib.cros.graphite import autotest_es

28

from autotest_lib.client.common_lib.cros.graphite import autotest_stats

Prashanth Balasubramanian

2014-11-06 15:58:21 -0800

[diff] [blame]

29

try:

30

from autotest_lib.puppylab import results_mocker

31

except ImportError:

32

results_mocker = None

33

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

34

require_atfork = global_config.global_config.get_config_value(

mbligh

cb8cb33

2009-09-03 21:08:56 +0000

[diff] [blame]

35

'AUTOSERV', 'require_atfork_module', type=bool, default=True)

36

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

37

Jakob Juelich

e497b55

2014-09-23 19:11:59 -0700

[diff] [blame]

38

# Number of seconds to wait before returning if testing mode is enabled

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

39

TESTING_MODE_SLEEP_SECS = 1

Jakob Juelich

e497b55

2014-09-23 19:11:59 -0700

[diff] [blame]

40

mbligh

cb8cb33

2009-09-03 21:08:56 +0000

[diff] [blame]

41

try:

42

import atfork

43

atfork.monkeypatch_os_fork_functions()

44

import atfork.stdlib_fixer

45

# Fix the Python standard library for threading+fork safety with its

46

# internal locks. http://code.google.com/p/python-atfork/

47

import warnings

48

warnings.filterwarnings('ignore', 'logging module already imported')

49

atfork.stdlib_fixer.fix_logging_module()

50

except ImportError, e:

51

from autotest_lib.client.common_lib import global_config

52

if global_config.global_config.get_config_value(

53

'AUTOSERV', 'require_atfork_module', type=bool, default=False):

54

print >>sys.stderr, 'Please run utils/build_externals.py'

55

print e

56

sys.exit(1)

mbligh

9ff89cd

2009-09-03 20:28:17 +0000

[diff] [blame]

57

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

58

from autotest_lib.server import frontend

showard

2009-06-10 17:40:41 +0000

[diff] [blame]

59

from autotest_lib.server import server_logging_config

showard

043c62a

2009-06-10 19:48:57 +0000

[diff] [blame]

60

from autotest_lib.server import server_job, utils, autoserv_parser, autotest

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

61

from autotest_lib.server import utils as server_utils

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

62

from autotest_lib.site_utils import job_directories

Fang Deng

2014-10-23 13:56:41 -0700

[diff] [blame]

63

from autotest_lib.site_utils import job_overhead

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

64

from autotest_lib.site_utils import lxc

Dan Shi

7836d25

2015-04-27 15:33:58 -0700

[diff] [blame]

65

from autotest_lib.site_utils import lxc_utils

showard

2009-06-10 17:40:41 +0000

[diff] [blame]

66

from autotest_lib.client.common_lib import pidfile, logging_manager

Gabe Black

1e1c41b

2015-02-04 23:55:15 -0800

[diff] [blame]

67

from autotest_lib.client.common_lib.cros.graphite import autotest_stats

mbligh

92c0fc2

2008-11-20 16:52:23 +0000

[diff] [blame]

68

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

69

# Control segment to stage server-side package.

70

STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(

71

'stage_server_side_package')

72

Alex Miller

f1af17e

2013-01-09 22:50:32 -0800

[diff] [blame]

73

def log_alarm(signum, frame):

74

logging.error("Received SIGALARM. Ignoring and continuing on.")

Alex Miller

0528d6f

2013-01-11 10:49:48 -0800

[diff] [blame]

75

sys.exit(1)

Alex Miller

f1af17e

2013-01-09 22:50:32 -0800

[diff] [blame]

76

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

77

78

def _get_machines(parser):

79

"""Get a list of machine names from command line arg -m or a file.

80

81

@param parser: Parser for the command line arguments.

82

83

@return: A list of machine names from command line arg -m or the

84

machines file specified in the command line arg -M.

85

"""

86

if parser.options.machines:

87

machines = parser.options.machines.replace(',', ' ').strip().split()

88

else:

89

machines = []

90

machines_file = parser.options.machines_file

91

if machines_file:

92

machines = []

93

for m in open(machines_file, 'r').readlines():

94

# remove comments, spaces

95

m = re.sub('#.*', '', m).strip()

96

if m:

97

machines.append(m)

98

logging.debug('Read list of machines from file: %s', machines_file)

99

logging.debug('Machines: %s', ','.join(machines))

100

101

if machines:

102

for machine in machines:

103

if not machine or re.search('\s', machine):

104

parser.parser.error("Invalid machine: %s" % str(machine))

105

machines = list(set(machines))

machines.sort()

return machines

def _stage_ssp(parser):

111

"""Stage server-side package.

112

113

This function calls a control segment to stage server-side package based on

114

the job and autoserv command line option. The detail implementation could

115

be different for each host type. Currently, only CrosHost has

116

stage_server_side_package function defined.

117

The script returns None if no server-side package is available. However,

118

it may raise exception if it failed for reasons other than artifact (the

119

server-side package) not found.

120

121

@param parser: Command line arguments parser passed in the autoserv process.

122

123

@return: url of the staged server-side package. Return None if server-

124

side package is not found for the build.

125

"""

Dan Shi

36cfd83

2014-10-10 13:38:51 -0700

[diff] [blame]

126

# If test_source_build is not specified, default to use server-side test

127

# code from build specified in --image.

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

128

namespace = {'machines': _get_machines(parser),

Dan Shi

36cfd83

2014-10-10 13:38:51 -0700

[diff] [blame]

129

'image': (parser.options.test_source_build or

130

parser.options.image),}

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

131

script_locals = {}

132

execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)

133

return script_locals['ssp_url']

134

135

136

def _run_with_ssp(container_name, job_id, results, parser, ssp_url):

137

"""Run the server job with server-side packaging.

138

139

@param container_name: Name of the container to run the test.

140

@param job_id: ID of the test job.

141

@param results: Folder to store results. This could be different from

142

parser.options.results:

143

parser.options.results can be set to None for results to be

144

stored in a temp folder.

145

results can be None for autoserv run requires no logging.

146

@param parser: Command line parser that contains the options.

147

@param ssp_url: url of the staged server-side package.

148

"""

149

bucket = lxc.ContainerBucket()

150

control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''

151

else None)

152

test_container = bucket.setup_test(container_name, job_id, ssp_url, results,

153

control=control)

154

args = sys.argv[:]

155

args.remove('--require-ssp')

Dan Shi

77b79a6

2015-07-29 16:22:05 -0700

[diff] [blame^]

156

# --parent_job_id is only useful in autoserv running in host, not in

157

# container. Include this argument will cause test to fail for builds before

158

# CL 286265 was merged.

159

if '--parent_job_id' in args:

160

index = args.index('--parent_job_id')

161

args.remove('--parent_job_id')

162

# Remove the actual parent job id in command line arg.

163

del args[index]

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

164

165

# A dictionary of paths to replace in the command line. Key is the path to

166

# be replaced with the one in value.

167

paths_to_replace = {}

168

# Replace the control file path with the one in container.

169

if control:

170

container_control_filename = os.path.join(

171

lxc.CONTROL_TEMP_PATH, os.path.basename(control))

172

paths_to_replace[control] = container_control_filename

173

# Update result directory with the one in container.

174

if parser.options.results:

175

container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)

176

paths_to_replace[parser.options.results] = container_result_dir

177

# Update parse_job directory with the one in container. The assumption is

178

# that the result folder to be parsed is always the same as the results_dir.

179

if parser.options.parse_job:

180

container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)

181

paths_to_replace[parser.options.parse_job] = container_result_dir

182

183

args = [paths_to_replace.get(arg, arg) for arg in args]

184

185

# Apply --use-existing-results, results directory is aready created and

186

# mounted in container. Apply this arg to avoid exception being raised.

187

if not '--use-existing-results' in args:

188

args.append('--use-existing-results')

189

190

# Make sure autoserv running in container using a different pid file.

191

if not '--pidfile-label' in args:

192

args.extend(['--pidfile-label', 'container_autoserv'])

193

Dan Shi

d1f5123

2015-04-18 00:29:14 -0700

[diff] [blame]

194

cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

195

logging.info('Run command in container: %s', cmd_line)

Dan Shi

2015-04-13 15:46:47 -0700

[diff] [blame]

196

success = False

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

197

try:

198

test_container.attach_run(cmd_line)

Dan Shi

2015-04-13 15:46:47 -0700

[diff] [blame]

199

success = True

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

200

finally:

Dan Shi

2015-04-13 15:46:47 -0700

[diff] [blame]

201

counter_key = '%s.%s' % (lxc.STATS_KEY,

202

'success' if success else 'fail')

203

autotest_stats.Counter(counter_key).increment()

204

# metadata is uploaded separately so it can use http to upload.

205

metadata = {'drone': socket.gethostname(),

206

'job_id': job_id,

207

'success': success}

208

autotest_es.post(use_http=True,

209

type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,

210

metadata=metadata)

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

211

test_container.destroy()

212

213

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

214

def correct_results_folder_permission(results):

215

"""Make sure the results folder has the right permission settings.

216

217

For tests running with server-side packaging, the results folder has the

218

owner of root. This must be changed to the user running the autoserv

219

process, so parsing job can access the results folder.

220

TODO(dshi): crbug.com/459344 Remove this function when test container can be

221

unprivileged container.

222

223

@param results: Path to the results folder.

"""

if not results:

return

Dan Shi

2015-07-20 09:00:25 -0700

[diff] [blame]

229

utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))

230

utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

231

232

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

233

def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

234

"""Run server job with given options.

235

236

@param pid_file_manager: PidFileManager used to monitor the autoserv process

237

@param results: Folder to store results.

238

@param parser: Parser for the command line arguments.

239

@param ssp_url: Url to server-side package.

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

240

@param use_ssp: Set to True to run with server-side packaging.

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

241

"""

Dan Shi

ec1d47d

2015-02-13 11:38:13 -0800

[diff] [blame]

242

if parser.options.warn_no_ssp:

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

243

# Post a warning in the log.

Dan Shi

ec1d47d

2015-02-13 11:38:13 -0800

[diff] [blame]

244

logging.warn('Autoserv is required to run with server-side packaging. '

245

'However, no drone is found to support server-side '

246

'packaging. The test will be executed in a drone without '

247

'server-side packaging supported.')

248

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

249

# send stdin to /dev/null

250

dev_null = os.open(os.devnull, os.O_RDONLY)

251

os.dup2(dev_null, sys.stdin.fileno())

252

os.close(dev_null)

mbligh

dbf3761

2007-11-24 19:38:11 +0000

[diff] [blame]

253

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

254

# Create separate process group

255

os.setpgrp()

mbligh

1d42d4e

2007-11-05 22:42:00 +0000

[diff] [blame]

256

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

257

# Container name is predefined so the container can be destroyed in

258

# handle_sigterm.

259

job_or_task_id = job_directories.get_job_id_or_task_id(

260

parser.options.results)

261

container_name = (lxc.TEST_CONTAINER_NAME_FMT %

Dan Shi

d68d51c

2015-04-21 17:00:42 -0700

[diff] [blame]

262

(job_or_task_id, time.time(), os.getpid()))

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

263

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

264

# Implement SIGTERM handler

mbligh

c229956

2009-07-02 19:00:36 +0000

[diff] [blame]

265

def handle_sigterm(signum, frame):

Simran Basi

9d9b729

2013-10-16 16:44:07 -0700

[diff] [blame]

266

logging.debug('Received SIGTERM')

mbligh

2008-12-22 14:53:35 +0000

[diff] [blame]

267

if pid_file_manager:

268

pid_file_manager.close_file(1, signal.SIGTERM)

Simran Basi

49e21e6

2013-10-17 12:40:33 -0700

[diff] [blame]

269

logging.debug('Finished writing to pid_file. Killing process.')

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

270

271

# Update results folder's file permission. This needs to be done ASAP

272

# before the parsing process tries to access the log.

273

if use_ssp and results:

274

correct_results_folder_permission(results)

275

Simran Basi

d6b8377

2014-01-06 16:31:30 -0800

[diff] [blame]

276

# TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.

277

# This sleep allows the pending output to be logged before the kill

278

# signal is sent.

279

time.sleep(.1)

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

280

if use_ssp:

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

281

logging.debug('Destroy container %s before aborting the autoserv '

282

'process.', container_name)

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

283

metadata = {'drone': socket.gethostname(),

284

'job_id': job_or_task_id,

285

'container_name': container_name,

286

'action': 'abort',

287

'success': True}

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

288

try:

289

bucket = lxc.ContainerBucket()

290

container = bucket.get(container_name)

291

if container:

292

container.destroy()

293

else:

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

294

metadata['success'] = False

295

metadata['error'] = 'container not found'

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

296

logging.debug('Container %s is not found.', container_name)

297

except:

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

298

metadata['success'] = False

299

metadata['error'] = 'Exception: %s' % sys.exc_info()

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

300

# Handle any exception so the autoserv process can be aborted.

301

logging.error('Failed to destroy container %s. Error: %s',

302

container_name, sys.exc_info())

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

303

autotest_es.post(use_http=True,

304

type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,

305

metadata=metadata)

Dan Shi

e4a4f9f

2015-07-20 09:00:25 -0700

[diff] [blame]

306

# Try to correct the result file permission again after the

307

# container is destroyed, as the container might have created some

308

# new files in the result folder.

309

if results:

310

correct_results_folder_permission(results)

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

311

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

312

os.killpg(os.getpgrp(), signal.SIGKILL)

mbligh

faf0cd4

2007-11-19 16:00:24 +0000

[diff] [blame]

313

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

314

# Set signal handler

mbligh

c229956

2009-07-02 19:00:36 +0000

[diff] [blame]

315

signal.signal(signal.SIGTERM, handle_sigterm)

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

316

Simran Basi

d6b8377

2014-01-06 16:31:30 -0800

[diff] [blame]

317

# faulthandler is only needed to debug in the Lab and is not avaliable to

318

# be imported in the chroot as part of VMTest, so Try-Except it.

319

try:

320

import faulthandler

321

faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)

322

logging.debug('faulthandler registered on SIGTERM.')

323

except ImportError:

Christopher Grant

4beca02

2015-06-16 15:14:47 -0400

[diff] [blame]

324

sys.exc_clear()

Simran Basi

d6b8377

2014-01-06 16:31:30 -0800

[diff] [blame]

325

David Rochberg

8a60d1e

2011-02-01 14:22:07 -0500

[diff] [blame]

326

# Ignore SIGTTOU's generated by output from forked children.

327

signal.signal(signal.SIGTTOU, signal.SIG_IGN)

328

Alex Miller

f1af17e

2013-01-09 22:50:32 -0800

[diff] [blame]

329

# If we received a SIGALARM, let's be loud about it.

330

signal.signal(signal.SIGALRM, log_alarm)

331

mbligh

a5f5e54

2009-12-30 16:57:49 +0000

[diff] [blame]

332

# Server side tests that call shell scripts often depend on $USER being set

333

# but depending on how you launch your autotest scheduler it may not be set.

334

os.environ['USER'] = getpass.getuser()

335

mbligh

2008-07-24 20:25:57 +0000

[diff] [blame]

336

label = parser.options.label

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

337

group_name = parser.options.group_name

mbligh

2008-07-24 20:25:57 +0000

[diff] [blame]

338

user = parser.options.user

339

client = parser.options.client

340

server = parser.options.server

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

341

install_before = parser.options.install_before

mbligh

2008-07-24 20:25:57 +0000

[diff] [blame]

342

install_after = parser.options.install_after

343

verify = parser.options.verify

344

repair = parser.options.repair

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

345

cleanup = parser.options.cleanup

Alex Miller

2013-05-29 14:43:00 -0700

[diff] [blame]

346

provision = parser.options.provision

Dan Shi

07e09af

2013-04-12 09:31:29 -0700

[diff] [blame]

347

reset = parser.options.reset

Alex Miller

2014-02-28 15:33:39 -0800

[diff] [blame]

348

job_labels = parser.options.job_labels

mbligh

2008-07-24 20:25:57 +0000

[diff] [blame]

349

no_tee = parser.options.no_tee

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

350

parse_job = parser.options.parse_job

mbligh

e7d9c60

2009-07-02 19:02:33 +0000

[diff] [blame]

351

execution_tag = parser.options.execution_tag

352

if not execution_tag:

353

execution_tag = parse_job

jadmanski

fbc1f0a

2008-07-09 14:12:54 +0000

[diff] [blame]

354

host_protection = parser.options.host_protection

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

355

ssh_user = parser.options.ssh_user

356

ssh_port = parser.options.ssh_port

357

ssh_pass = parser.options.ssh_pass

jadmanski

def0c3c

2009-03-25 20:07:10 +0000

[diff] [blame]

358

collect_crashinfo = parser.options.collect_crashinfo

mbligh

e0cbc91

2010-03-11 18:03:07 +0000

[diff] [blame]

359

control_filename = parser.options.control_filename

Scott Zawalski

91493c8

2013-01-25 16:15:20 -0500

[diff] [blame]

360

test_retry = parser.options.test_retry

beeps

cb6f1e2

2013-06-28 19:14:10 -0700

[diff] [blame]

361

verify_job_repo_url = parser.options.verify_job_repo_url

Christopher Wiley

f594c5e

2013-07-03 18:25:30 -0700

[diff] [blame]

362

skip_crash_collection = parser.options.skip_crash_collection

Aviv Keshet

18ee314

2013-08-12 15:01:51 -0700

[diff] [blame]

363

ssh_verbosity = int(parser.options.ssh_verbosity)

Fang Deng

6cc20de

2013-09-06 15:47:32 -0700

[diff] [blame]

364

ssh_options = parser.options.ssh_options

Dan Shi

b669cbd

2013-09-13 11:17:17 -0700

[diff] [blame]

365

no_use_packaging = parser.options.no_use_packaging

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

366

mbligh

2008-07-24 20:25:57 +0000

[diff] [blame]

367

# can't be both a client and a server side test

368

if client and server:

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

369

parser.parser.error("Can not specify a test as both server and client!")

mbligh

2008-07-24 20:25:57 +0000

[diff] [blame]

370

Alex Miller

2013-05-29 14:43:00 -0700

[diff] [blame]

371

if provision and client:

372

parser.parser.error("Cannot specify provisioning and client!")

373

374

is_special_task = (verify or repair or cleanup or collect_crashinfo or

Dan Shi

07e09af

2013-04-12 09:31:29 -0700

[diff] [blame]

375

provision or reset)

Alex Miller

2013-05-29 14:43:00 -0700

[diff] [blame]

376

if len(parser.args) < 1 and not is_special_task:

Eric Li

861b2d5

2011-02-04 14:50:35 -0800

[diff] [blame]

377

parser.parser.error("Missing argument: control file")

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

378

Aviv Keshet

18ee314

2013-08-12 15:01:51 -0700

[diff] [blame]

379

if ssh_verbosity > 0:

380

# ssh_verbosity is an integer between 0 and 3, inclusive

381

ssh_verbosity_flag = '-' + 'v' * ssh_verbosity

Fang Deng

d1c2b73

2013-08-20 12:59:46 -0700

[diff] [blame]

382

else:

383

ssh_verbosity_flag = ''

Aviv Keshet

18ee314

2013-08-12 15:01:51 -0700

[diff] [blame]

384

showard

45ae819

2008-11-05 19:32:53 +0000

[diff] [blame]

385

# We have a control file unless it's just a verify/repair/cleanup job

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

386

if len(parser.args) > 0:

387

control = parser.args[0]

388

else:

389

control = None

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

390

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

391

machines = _get_machines(parser)

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

392

if group_name and len(machines) < 2:

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

393

parser.parser.error('-G %r may only be supplied with more than one '

394

'machine.' % group_name)

mbligh

374f341

2009-05-13 21:29:45 +0000

[diff] [blame]

395

Christopher Wiley

8a91f23

2013-07-09 11:02:27 -0700

[diff] [blame]

396

kwargs = {'group_name': group_name, 'tag': execution_tag,

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

397

'disable_sysinfo': parser.options.disable_sysinfo}

Dan Shi

70647ca

2015-07-16 22:52:35 -0700

[diff] [blame]

398

if parser.options.parent_job_id:

399

kwargs['parent_job_id'] = int(parser.options.parent_job_id)

mbligh

e0cbc91

2010-03-11 18:03:07 +0000

[diff] [blame]

400

if control_filename:

401

kwargs['control_filename'] = control_filename

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

402

job = server_job.server_job(control, parser.args[1:], results, label,

403

user, machines, client, parse_job,

Fang Deng

d1c2b73

2013-08-20 12:59:46 -0700

[diff] [blame]

404

ssh_user, ssh_port, ssh_pass,

Aviv Keshet

c5947fa

2013-09-04 14:06:29 -0700

[diff] [blame]

405

ssh_verbosity_flag, ssh_options,

406

test_retry, **kwargs)

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

407

showard

2009-06-10 17:40:41 +0000

[diff] [blame]

408

job.logging.start_logging()

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

409

job.init_parser()

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

410

mbligh

161fe6f

2008-06-19 16:26:04 +0000

[diff] [blame]

# perform checks

job.precheck()

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

414

# run the job

415

exit_code = 0

416

try:

mbligh

2009-06-08 16:47:28 +0000

[diff] [blame]

417

try:

418

if repair:

Alex Miller

2014-02-28 15:33:39 -0800

[diff] [blame]

419

job.repair(host_protection, job_labels)

mbligh

2009-06-08 16:47:28 +0000

[diff] [blame]

420

elif verify:

Alex Miller

2014-02-28 15:33:39 -0800

[diff] [blame]

421

job.verify(job_labels)

Alex Miller

2013-05-29 14:43:00 -0700

[diff] [blame]

422

elif provision:

Alex Miller

2014-02-28 15:33:39 -0800

[diff] [blame]

423

job.provision(job_labels)

Dan Shi

07e09af

2013-04-12 09:31:29 -0700

[diff] [blame]

424

elif reset:

Alex Miller

2014-02-28 15:33:39 -0800

[diff] [blame]

425

job.reset(job_labels)

Fang Deng

ad78aca

2014-10-02 18:15:46 -0700

[diff] [blame]

426

elif cleanup:

427

job.cleanup(job_labels)

mbligh

2009-06-08 16:47:28 +0000

[diff] [blame]

428

else:

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

429

if use_ssp:

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

430

try:

431

_run_with_ssp(container_name, job_or_task_id, results,

432

parser, ssp_url)

433

finally:

434

# Update the ownership of files in result folder.

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

435

correct_results_folder_permission(results)

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

436

else:

437

job.run(install_before, install_after,

438

verify_job_repo_url=verify_job_repo_url,

439

only_collect_crashinfo=collect_crashinfo,

440

skip_crash_collection=skip_crash_collection,

Dan Shi

b669cbd

2013-09-13 11:17:17 -0700

[diff] [blame]

441

job_labels=job_labels,

442

use_packaging=(not no_use_packaging))

mbligh

2009-06-08 16:47:28 +0000

[diff] [blame]

443

finally:

444

while job.hosts:

445

host = job.hosts.pop()

446

host.close()

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

447

except:

jadmanski

27b37ea

2008-10-29 23:54:31 +0000

[diff] [blame]

448

exit_code = 1

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

449

traceback.print_exc()

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

450

mbligh

2008-12-22 14:53:35 +0000

[diff] [blame]

451

if pid_file_manager:

452

pid_file_manager.num_tests_failed = job.num_tests_failed

453

pid_file_manager.close_file(exit_code)

jadmanski

e0dffc3

2008-12-15 17:30:30 +0000

[diff] [blame]

454

job.cleanup_parser()

showard

21baa45

2008-10-21 00:08:39 +0000

[diff] [blame]

455

jadmanski

27b37ea

2008-10-29 23:54:31 +0000

[diff] [blame]

456

sys.exit(exit_code)

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

457

458

Fang Deng

2014-10-23 13:56:41 -0700

[diff] [blame]

459

def record_autoserv(options, duration_secs):

460

"""Record autoserv end-to-end time in metadata db.

461

462

@param options: parser options.

463

@param duration_secs: How long autoserv has taken, in secs.

464

"""

465

# Get machine hostname

466

machines = options.machines.replace(

467

',', ' ').strip().split() if options.machines else []

468

num_machines = len(machines)

469

if num_machines > 1:

470

# Skip the case where atomic group is used.

471

return

472

elif num_machines == 0:

473

machines.append('hostless')

474

475

# Determine the status that will be reported.

476

s = job_overhead.STATUS

477

task_mapping = {

478

'reset': s.RESETTING, 'verify': s.VERIFYING,

479

'provision': s.PROVISIONING, 'repair': s.REPAIRING,

480

'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}

481

# option_dict will be like {'reset': True, 'repair': False, ...}

482

option_dict = ast.literal_eval(str(options))

483

match = filter(lambda task: option_dict.get(task) == True, task_mapping)

484

status = task_mapping[match[0]] if match else s.RUNNING

485

is_special_task = status not in [s.RUNNING, s.GATHERING]

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

486

job_or_task_id = job_directories.get_job_id_or_task_id(options.results)

Fang Deng

2014-10-23 13:56:41 -0700

[diff] [blame]

487

job_overhead.record_state_duration(

488

job_or_task_id, machines[0], status, duration_secs,

489

is_special_task=is_special_task)

490

491

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

492

def main():

Fang Deng

2014-10-23 13:56:41 -0700

[diff] [blame]

493

start_time = datetime.datetime.now()

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

494

# White list of tests with run time measurement enabled.

495

measure_run_time_tests_names = global_config.global_config.get_config_value(

496

'AUTOSERV', 'measure_run_time_tests', type=str)

497

if measure_run_time_tests_names:

498

measure_run_time_tests = [t.strip() for t in

499

measure_run_time_tests_names.split(',')]

500

else:

501

measure_run_time_tests = []

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

502

# grab the parser

503

parser = autoserv_parser.autoserv_parser

mbligh

a5cb406

2009-02-17 15:53:39 +0000

[diff] [blame]

504

parser.parse_args()

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

505

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

506

if len(sys.argv) == 1:

507

parser.parser.print_help()

508

sys.exit(1)

mbligh

a6f1308

2008-06-05 23:53:46 +0000

[diff] [blame]

509

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

510

# If the job requires to run with server-side package, try to stage server-

511

# side package first. If that fails with error that autotest server package

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

512

# does not exist, fall back to run the job without using server-side

513

# packaging. If option warn_no_ssp is specified, that means autoserv is

514

# running in a drone does not support SSP, thus no need to stage server-side

515

# package.

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

516

ssp_url = None

Dan Shi

0b754c5

2015-04-20 14:20:38 -0700

[diff] [blame]

517

ssp_url_warning = False

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

518

if (not parser.options.warn_no_ssp and parser.options.require_ssp):

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

519

ssp_url = _stage_ssp(parser)

Dan Shi

0b754c5

2015-04-20 14:20:38 -0700

[diff] [blame]

520

# The build does not have autotest server package. Fall back to not

521

# to use server-side package. Logging is postponed until logging being

522

# set up.

523

ssp_url_warning = not ssp_url

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

524

showard

2009-06-10 17:40:41 +0000

[diff] [blame]

525

if parser.options.no_logging:

526

results = None

527

else:

528

results = parser.options.results

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

529

if not results:

530

results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')

531

results = os.path.abspath(results)

showard

566d3c0

2010-01-12 18:57:01 +0000

[diff] [blame]

532

resultdir_exists = False

533

for filename in ('control.srv', 'status.log', '.autoserv_execute'):

534

if os.path.exists(os.path.join(results, filename)):

535

resultdir_exists = True

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

536

if not parser.options.use_existing_results and resultdir_exists:

mbligh

80e1eba

2008-11-19 00:26:18 +0000

[diff] [blame]

537

error = "Error: results directory already exists: %s\n" % results

538

sys.stderr.write(error)

539

sys.exit(1)

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

540

541

# Now that we certified that there's no leftover results dir from

542

# previous jobs, lets create the result dir since the logging system

543

# needs to create the log file in there.

544

if not os.path.isdir(results):

545

os.makedirs(results)

showard

2009-06-10 17:40:41 +0000

[diff] [blame]

546

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

547

# Server-side packaging will only be used if it's required and the package

548

# is available. If warn_no_ssp is specified, it means that autoserv is

549

# running in a drone does not have SSP supported and a warning will be logs.

550

# Therefore, it should not run with SSP.

551

use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp

552

and ssp_url)

553

if use_ssp:

Dan Shi

e28de55

2015-05-06 16:51:58 -0700

[diff] [blame]

554

log_dir = os.path.join(results, 'ssp_logs') if results else None

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

555

if log_dir and not os.path.exists(log_dir):

556

os.makedirs(log_dir)

557

else:

558

log_dir = results

Dan Shi

2015-04-21 11:11:06 -0700

[diff] [blame]

559

showard

2009-06-10 17:40:41 +0000

[diff] [blame]

560

logging_manager.configure_logging(

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

561

server_logging_config.ServerLoggingConfig(),

562

results_dir=log_dir,

showard

10d8417

2009-06-18 23:16:50 +0000

[diff] [blame]

563

use_console=not parser.options.no_tee,

564

verbose=parser.options.verbose,

565

no_console_prefix=parser.options.no_console_prefix)

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

566

Dan Shi

0b754c5

2015-04-20 14:20:38 -0700

[diff] [blame]

567

if ssp_url_warning:

568

logging.warn(

569

'Autoserv is required to run with server-side packaging. '

570

'However, no server-side package can be found based on '

571

'`--image`, host attribute job_repo_url or host label of '

572

'cros-version. The test will be executed without '

573

'server-side packaging supported.')

574

showard

2009-06-10 17:40:41 +0000

[diff] [blame]

575

if results:

mbligh

a788dc4

2009-03-26 21:10:16 +0000

[diff] [blame]

576

logging.info("Results placed in %s" % results)

mbligh

1071763

2008-11-19 00:21:57 +0000

[diff] [blame]

577

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

578

# wait until now to perform this check, so it get properly logged

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

579

if (parser.options.use_existing_results and not resultdir_exists and

Dan Shi

ff78f11

2015-06-12 13:34:02 -0700

[diff] [blame]

580

not utils.is_in_container()):

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

581

logging.error("No existing results directory found: %s", results)

582

sys.exit(1)

583

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

584

logging.debug('autoserv is running in drone %s.', socket.gethostname())

Aviv Keshet

2013-08-20 12:11:12 -0700

[diff] [blame]

585

logging.debug('autoserv command was: %s', ' '.join(sys.argv))

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

586

Dan Shi

2015-03-12 15:04:21 -0700

[diff] [blame]

587

if parser.options.write_pidfile and results:

mbligh

2010-01-05 18:22:35 +0000

[diff] [blame]

588

pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,

589

results)

jadmanski

d5ab8c5

2008-12-03 16:27:07 +0000

[diff] [blame]

590

pid_file_manager.open_file()

mbligh

2008-12-22 14:53:35 +0000

[diff] [blame]

591

else:

592

pid_file_manager = None

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

593

jadmanski

f22fea8

2008-11-26 20:57:07 +0000

[diff] [blame]

594

autotest.BaseAutotest.set_install_in_tmpdir(

595

parser.options.install_in_tmpdir)

596

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

597

timer = None

598

try:

599

# Take the first argument as control file name, get the test name from

600

# the control file. If the test name exists in the list of tests with

601

# run time measurement enabled, start a timer to begin measurement.

602

if (len(parser.args) > 0 and parser.args[0] != '' and

603

parser.options.machines):

Dan Shi

bbc1613

2013-07-09 16:23:59 -0700

[diff] [blame]

604

try:

605

test_name = control_data.parse_control(parser.args[0],

606

raise_warnings=True).name

607

except control_data.ControlVariableException:

608

logging.debug('Failed to retrieve test name from control file.')

609

test_name = None

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

610

if test_name in measure_run_time_tests:

611

machines = parser.options.machines.replace(',', ' '

612

).strip().split()

Dan Shi

8eac5af

2014-09-17 00:15:15 -0700

[diff] [blame]

613

try:

614

afe = frontend.AFE()

615

board = server_utils.get_board_from_afe(machines[0], afe)

Gabe Black

1e1c41b

2015-02-04 23:55:15 -0800

[diff] [blame]

616

timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %

617

(board, test_name))

Dan Shi

8eac5af

2014-09-17 00:15:15 -0700

[diff] [blame]

618

timer.start()

619

except (urllib2.HTTPError, urllib2.URLError):

620

# Ignore error if RPC failed to get board

621

pass

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

622

except control_data.ControlVariableException as e:

623

logging.error(str(e))

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

624

exit_code = 0

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

625

# TODO(beeps): Extend this to cover different failure modes.

626

# Testing exceptions are matched against labels sent to autoserv. Eg,

627

# to allow only the hostless job to run, specify

628

# testing_exceptions: test_suite in the shadow_config. To allow both

629

# the hostless job and dummy_Pass to run, specify

630

# testing_exceptions: test_suite,dummy_Pass. You can figure out

631

# what label autoserv is invoked with by looking through the logs of a test

632

# for the autoserv command's -l option.

633

testing_exceptions = global_config.global_config.get_config_value(

634

'AUTOSERV', 'testing_exceptions', type=list, default=[])

635

test_mode = global_config.global_config.get_config_value(

636

'AUTOSERV', 'testing_mode', type=bool, default=False)

Prashanth Balasubramanian

2014-11-06 15:58:21 -0800

[diff] [blame]

637

test_mode = (results_mocker and test_mode and not

638

any([ex in parser.options.label

639

for ex in testing_exceptions]))

640

is_task = (parser.options.verify or parser.options.repair or

641

parser.options.provision or parser.options.reset or

642

parser.options.cleanup or parser.options.collect_crashinfo)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

643

try:

644

try:

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

645

if test_mode:

Prashanth Balasubramanian

2014-11-06 15:58:21 -0800

[diff] [blame]

646

# The parser doesn't run on tasks anyway, so we can just return

647

# happy signals without faking results.

648

if not is_task:

649

machine = parser.options.results.split('/')[-1]

650

651

# TODO(beeps): The proper way to do this would be to

652

# refactor job creation so we can invoke job.record

653

# directly. To do that one needs to pipe the test_name

654

# through run_autoserv and bail just before invoking

655

# the server job. See the comment in

656

# puppylab/results_mocker for more context.

657

results_mocker.ResultsMocker(

Prashanth Balasubramanian

22dd226

2014-11-28 18:19:18 -0800

[diff] [blame]

658

test_name if test_name else 'unknown-test',

659

parser.options.results, machine

Prashanth Balasubramanian

2014-11-06 15:58:21 -0800

[diff] [blame]

660

).mock_results()

661

return

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

662

else:

Dan Shi

2015-04-07 10:10:52 -0700

[diff] [blame]

663

run_autoserv(pid_file_manager, results, parser, ssp_url,

664

use_ssp)

Aviv Keshet

2013-08-20 12:11:12 -0700

[diff] [blame]

665

except SystemExit as e:

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

666

exit_code = e.code

Aviv Keshet

2013-08-20 12:11:12 -0700

[diff] [blame]

667

if exit_code:

668

logging.exception(e)

669

except Exception as e:

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

670

# If we don't know what happened, we'll classify it as

671

# an 'abort' and return 1.

Aviv Keshet

2013-08-20 12:11:12 -0700

[diff] [blame]

672

logging.exception(e)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

673

exit_code = 1

674

finally:

mbligh

2008-12-22 14:53:35 +0000

[diff] [blame]

675

if pid_file_manager:

676

pid_file_manager.close_file(exit_code)

Dan Shi

2013-06-06 11:21:31 -0700

[diff] [blame]

677

if timer:

678

timer.stop()

Fang Deng

2014-10-23 13:56:41 -0700

[diff] [blame]

679

# Record the autoserv duration time. Must be called

680

# just before the system exits to ensure accuracy.

681

duration_secs = (datetime.datetime.now() - start_time).total_seconds()

682

record_autoserv(parser.options, duration_secs)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

683

sys.exit(exit_code)

mbligh

faf0cd4

2007-11-19 16:00:24 +0000

[diff] [blame]

684

mbligh

bb42185

2008-03-11 22:36:16 +0000

[diff] [blame]

685

mbligh

2008-05-01 20:00:01 +0000

[diff] [blame]

686

if __name__ == '__main__':

jadmanski